From: <lor...@us...> - 2011-03-18 09:35:31
|
Revision: 2726 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2726&view=rev Author: lorenz_b Date: 2011-03-18 09:35:21 +0000 (Fri, 18 Mar 2011) Log Message: ----------- Integrated template based SPARQL query learning algorithm. Modified Paths: -------------- trunk/components-ext/pom.xml Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DUDE2UDRS_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Case.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Feature.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Gender.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/LexicalSelection.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/MorphologicalProperty.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Numerus.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Prep_de.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Category.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/LTAG_Tree_Constructor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TerminalNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Tree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TreeNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/AdjunctionPointer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/DerivationTree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/DerivedTree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAGLexicon.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LeftCompletor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LeftPredictor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/MoveDotDown.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/MoveDotUp.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Operation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/OperationPointer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/OperationType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/ParseGrammar.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/ParseState.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/ParserOperation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/RightCompletor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/RightPredictor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Scanner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/SubstCompletor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/SubstPredictor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/SubstitutionPointer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/TAG.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserConstants.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserTokenManager.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAG_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ParseException.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/SimpleCharStream.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/Token.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/TokenMgrError.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/Complex_DRS_Condition.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Condition.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Quantifier.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DiscourseReferent.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/Negated_DRS.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/Simple_DRS_Condition.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/UDRS.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Argument.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/DUDE_Constructor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Restriction.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/ParseException.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/SimpleCharStream.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/Token.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/TokenMgrError.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/CompositeType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/DomType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/DominanceConstraint.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/ElemType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/ElementaryType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/Label.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/Pair.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/Position.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/SemanticRepresentation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/SortalRestriction.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/util/Type.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Aggregate.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Entity.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Filter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Negation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_OrderBy.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_PairType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Prefix.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_QueryType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java trunk/components-ext/src/main/resources/ trunk/components-ext/src/main/resources/log4j.properties trunk/components-ext/src/main/resources/tbsl/ trunk/components-ext/src/main/resources/tbsl/lexicon/ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/models/ trunk/components-ext/src/main/resources/tbsl/models/README-Models.txt trunk/components-ext/src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger trunk/components-ext/src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger.props trunk/components-ext/src/main/resources/tbsl/models/left3words-wsj-0-18.tagger trunk/components-ext/src/main/resources/tbsl/models/left3words-wsj-0-18.tagger.props trunk/components-ext/src/test/java/org/dllearner/algorithm/ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/WordNetTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-03-17 11:24:56 UTC (rev 2725) +++ trunk/components-ext/pom.xml 2011-03-18 09:35:21 UTC (rev 2726) @@ -18,5 +18,18 @@ <groupId>org.dllearner</groupId> <artifactId>components-core</artifactId> </dependency> + <dependency> + <groupId>org.dllearner</groupId> + <artifactId>autosparql</artifactId> + <version>1.0-SNAPSHOT</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>com.jamonapi</groupId> + <artifactId>jamon</artifactId> + <version>2.7</version> + <type>jar</type> + <scope>compile</scope> + </dependency> </dependencies> </project> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,47 @@ +package org.dllearner.algorithm.tbsl.cli; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.templator.Templator; + + +public class TestFrontend { + + public static void main(String[] args) { + + Templator templator = new Templator(); + + System.out.println("======= SPARQL Templator v0.1 ============="); + System.out.println("\nType ':q' to quit."); + + while (true) { + String s = getStringFromUser("input > ").trim(); + + if (s.equals(":q")) { + System.exit(0); + } + + Set<Template> temps = templator.buildTemplates(s); + + for (Template temp : temps) { + System.out.println(temp.toString()); + } + + } + } + + public static String getStringFromUser(String msg) { + String str = ""; + try { + System.out.println("\n===========================================\n"); + System.out.print(msg); + str = new BufferedReader(new InputStreamReader(System.in)).readLine(); + } catch (IOException e) { + } + return str; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,352 @@ +package org.dllearner.algorithm.tbsl.converter; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.Complex_DRS_Condition; +import org.dllearner.algorithm.tbsl.sem.drs.DRS; +import org.dllearner.algorithm.tbsl.sem.drs.DRS_Condition; +import org.dllearner.algorithm.tbsl.sem.drs.DRS_Quantifier; +import org.dllearner.algorithm.tbsl.sem.drs.DiscourseReferent; +import org.dllearner.algorithm.tbsl.sem.drs.Negated_DRS; +import org.dllearner.algorithm.tbsl.sem.drs.Simple_DRS_Condition; +import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Aggregate; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_OrderBy; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.Template; + + +public class DRS2SPARQL_Converter { + + // suppresses console output + private boolean silent = true; + List<Slot> slots; + Template template; + List<Integer> usedInts; + + public DRS2SPARQL_Converter() { + template = new Template(new Query()); + usedInts = new ArrayList<Integer>(); + } + + public DRS2SPARQL_Converter(boolean silent) { + setSilent(silent); + template = new Template(new Query()); + usedInts = new ArrayList<Integer>(); + } + + public boolean isSilent() { + return silent; + } + + public void setSilent(boolean silent) { + this.silent = silent; + } + + public List<SPARQL_Property> getProperties(Complex_DRS_Condition cond) { + List<SPARQL_Property> retVal = new ArrayList<SPARQL_Property>(); + + return retVal; + } + + public Template convert(DRS drs,List<Slot> ls) { + + Set<SPARQL_Prefix> prefixes = new HashSet<SPARQL_Prefix>(); + prefixes.add(new SPARQL_Prefix("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")); + prefixes.add(new SPARQL_Prefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#")); + + if (!isSilent()) { + System.out.print("Converting DRS{" + drs.toString() + "}..."); + } + + template = new Template(new Query()); + slots = ls; + + Query q = convert(drs, new Query(), false); + q.setPrefixes(prefixes); + + template.setQuery(q); + + if (!isSilent()) { + System.out.println("... done"); + } + + return template; + } + + private Query convert(DRS drs, Query query, boolean negate) { + + redundantEqualRenaming(drs); + + for (DiscourseReferent referent : drs.getDRs()) { + if (referent.isMarked()) { + SPARQL_Term term = new SPARQL_Term(referent.toString().replace("?","")); + term.setIsVariable(true); + query.addSelTerm(term); + } + if (referent.isNonexistential()) { + SPARQL_Term term = new SPARQL_Term(referent.getValue()); + term.setIsVariable(true); + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(term); + query.addFilter(f); + } + for (Slot s : slots) { + if (s.getAnchor().equals(referent.toString())) { + template.addSlot(s); + break; + } + } + } + + Set<SPARQL_Triple> statements = new HashSet<SPARQL_Triple>(); + + for (DRS_Condition condition : drs.getConditions()) { + Set<SPARQL_Triple> scondition = convertCondition(condition, query).getConditions(); + statements.addAll(scondition); + if (negate) { + for (int i = 0; i < scondition.size(); ++i) { + SPARQL_Term term = ((SPARQL_Triple) scondition.toArray()[i]).getVariable(); + if (query.isSelTerm(term)) { + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(term); + query.addFilter(f); + } + } + } + } + + if (query.getSelTerms().size() == 0) + query.setQt(SPARQL_QueryType.ASK); + + query.setConditions(statements); + + return query; + } + + private Query convertCondition(DRS_Condition condition, Query query) { + if (condition.isComplexCondition()) { + if (!isSilent()) { + System.out.print("|complex:" + condition.toString()); + } + Complex_DRS_Condition complex = (Complex_DRS_Condition) condition; + + DRS restrictor = complex.getRestrictor(); + DRS_Quantifier quant = complex.getQuantifier(); + DRS scope = complex.getScope(); + + // call recursively + for (DRS_Condition cond : restrictor.getConditions()) { + query = convertCondition(cond, query); + } + for (DRS_Condition cond : scope.getConditions()) { + query = convertCondition(cond, query); + } + // add the quantifier at last + DiscourseReferent ref = complex.getReferent(); + String sref = ref.getValue(); + if (!isSilent()) { + System.out.print("|quantor:" + quant); + } + switch (quant) { + case HOW_MANY: + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); + break; + case EVERY: + // probably save to ignore // TODO unless in cases like "which actor starred in every movie by spielberg?" + // query.addFilter(new SPARQL_Filter(new SPARQL_Term(sref))); + break; + case NO: + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(new SPARQL_Term(sref)); + query.addFilter(f); + break; + case FEW: // + break; + case MANY: // + break; + case MOST: // + break; + case SOME: // + break; + case THE_LEAST: + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); + query.addOrderBy(new SPARQL_Term(sref, SPARQL_OrderBy.ASC)); + query.setLimit(1); + break; + case THE_MOST: + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); + query.addOrderBy(new SPARQL_Term(sref, SPARQL_OrderBy.DESC)); + query.setLimit(1); + break; + } + } else if (condition.isNegatedCondition()) { + if (!isSilent()) { + System.out.print("|negation:" + condition.toString()); + } + Negated_DRS neg = (Negated_DRS) condition; + query = convert(neg.getDRS(), query, true); + + } else { + Simple_DRS_Condition simple = (Simple_DRS_Condition) condition; + + if (!isSilent()) { + System.out.print(isSilent() + "|simple:" + condition.toString()); + } + + int arity = simple.getArguments().size(); + String predicate = simple.getPredicate(); + if (predicate.startsWith("SLOT")) { + for (Slot s : slots) { + if (s.getAnchor().equals(predicate)) { + predicate = "p" + createFresh(); + s.setAnchor(predicate); + template.addSlot(s); + break; + } + } + } + SPARQL_Property prop = new SPARQL_Property(predicate); + prop.setIsVariable(true); + + if (predicate.equals("count")) { + // COUNT(?x) AS ?c + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, true, new SPARQL_Term(simple.getArguments().get(1).getValue(),true))); + return query; + } else if (predicate.equals("sum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.SUM)); + return query; + } else if (predicate.equals("greater")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue()), + SPARQL_PairType.GT))); + return query; + } else if (predicate.equals("greaterorequal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue()), + SPARQL_PairType.LT))); + return query; + } else if (predicate.equals("less")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue()), + SPARQL_PairType.LTEQ))); + return query; + } else if (predicate.equals("lessorequal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue()), + SPARQL_PairType.GT))); + return query; + } else if (predicate.equals("maximum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.MAX)); + return query; + } else if (predicate.equals("minimum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.MIN)); + return query; + } else if (predicate.equals("equal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue()), + SPARQL_PairType.EQ))); + return query; + } + + if (arity == 1) { + SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),true); + query.addCondition(new SPARQL_Triple(term,new SPARQL_Property("type",new SPARQL_Prefix("rdf","")),prop)); + } + else if (arity == 2) { + String arg1 = simple.getArguments().get(0).getValue(); + String arg2 = simple.getArguments().get(1).getValue(); + query.addCondition(new SPARQL_Triple(new SPARQL_Term(arg1,true),prop,new SPARQL_Term(arg2,true))); + } + else if (arity > 2) { + // TODO + } + } + return query; + } + + private void redundantEqualRenaming(DRS drs) { + + Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); + for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { + if(c.getPredicate().equals("equal")) { + equalsConditions.add(c); + } + } + + DiscourseReferent firstArg; + DiscourseReferent secondArg; + boolean firstIsURI; + boolean secondIsURI; + + for (Simple_DRS_Condition c : equalsConditions) { + + firstArg = c.getArguments().get(0); + secondArg = c.getArguments().get(1); + firstIsURI = isUri(firstArg.getValue()); + secondIsURI = isUri(secondArg.getValue()); + + boolean oneArgIsInt = firstArg.toString().matches("[0..9]") || secondArg.toString().matches("[0..9]"); + + drs.removeCondition(c); + if (firstIsURI) { + drs.replaceEqualRef(secondArg, firstArg, false); + for (Slot s : slots) { + if (s.getAnchor().equals(secondArg.getValue())) { + s.setAnchor(firstArg.getValue()); + } + } + } else if (secondIsURI) { + drs.replaceEqualRef(firstArg, secondArg, false); + for (Slot s : slots) { + if (s.getAnchor().equals(firstArg.getValue())) { + s.setAnchor(secondArg.getValue()); + } + } + } else if (!oneArgIsInt) { + drs.replaceEqualRef(firstArg, secondArg, false); + for (Slot s : slots) { + if (s.getAnchor().equals(firstArg.getValue())) { + s.setAnchor(secondArg.getValue()); + } + } + } + } + + } + + private boolean isUri(String arg) { + return false; // TODO + } + + private int createFresh() { + + int fresh = 0; + for (int i = 0; usedInts.contains(i); i++) { + fresh = i+1 ; + } + usedInts.add(fresh); + return fresh; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DUDE2UDRS_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DUDE2UDRS_Converter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DUDE2UDRS_Converter.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,77 @@ +package org.dllearner.algorithm.tbsl.converter; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.DRS; +import org.dllearner.algorithm.tbsl.sem.drs.UDRS; +import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; +import org.dllearner.algorithm.tbsl.sem.util.DomType; +import org.dllearner.algorithm.tbsl.sem.util.DominanceConstraint; +import org.dllearner.algorithm.tbsl.sem.util.Label; + +public class DUDE2UDRS_Converter { + + public DUDE2UDRS_Converter() { + } + + public UDRS convert(Dude dude) throws UnsupportedOperationException { + + UDRS udrs = new UDRS(); + + // determining bottom and top + + Set<Label> bottoms = new HashSet<Label>(); + Set<Label> tops = new HashSet<Label>(); + + for (DominanceConstraint constraint : dude.getDominanceConstraints()) { + if (!constraint.getType().equals(DomType.equal)) { + tops.add(constraint.getSuper()); + bottoms.add(constraint.getSub()); + } + } + for (DominanceConstraint constraint : dude.getDominanceConstraints()) { + if (!constraint.getType().equals(DomType.equal)) { + tops.remove(constraint.getSub()); + bottoms.remove(constraint.getSuper()); + } + } + + if (tops.isEmpty()) { // then all constraints were equals + tops.add(new Label("noTop")); + } + if (bottoms.isEmpty()) { // just to make sure... + bottoms.add(new Label("noBottom")); + } + + // precondition: tops and bottoms are singleton sets + Label bottomLabel = (new ArrayList<Label>(bottoms)).get(0); + Label topLabel = (new ArrayList<Label>(tops)).get(0); + + udrs.setBottom(bottomLabel); + udrs.setTop(topLabel); + + // copying components and dominance constraints + + for ( DRS component : dude.getComponents() ) + { + udrs.addComponent(component.clone()); + } + + if (!topLabel.toString().equals("noTop")) { + udrs.addComponent(dude.getComponent(topLabel).clone()); + } + if (!bottomLabel.toString().equals("noBottom")) { + udrs.addComponent(dude.getComponent(bottomLabel).clone()); + } + + for ( DominanceConstraint constraint : dude.getDominanceConstraints() ) + { + udrs.addDominanceConstraint(constraint); + } + + return udrs; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,230 @@ +package org.dllearner.algorithm.tbsl.learning; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.autosparql.server.search.SolrSearch; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.sparqlquerygenerator.util.ModelGenerator; +import org.dllearner.sparqlquerygenerator.util.ModelGenerator.Strategy; + +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +public class SPARQLTemplateBasedLearner { + + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); + private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); + + private static final int TOP_K = 5; + private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-1.4.1"; + private static final int RECURSION_DEPTH = 2; + + private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + private ExtractionDBCache cache = new ExtractionDBCache("cache"); + + private SolrSearch resource_index; + private SolrSearch class_index; + private SolrSearch property_index; + private ModelGenerator modelGenenerator; + private Templator templateGenerator; + + private String question; + + + public SPARQLTemplateBasedLearner(){ + resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); + resource_index.setHitsPerPage(TOP_K); + class_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_classes"); + class_index.setHitsPerPage(TOP_K); + property_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_properties"); + property_index.setHitsPerPage(TOP_K); + + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + + templateGenerator = new Templator(); + } + + public void setEndpoint(SparqlEndpoint endpoint){ + this.endpoint = endpoint; + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + } + + public void learnSPARQLQueries(String question){ + this.question = question; + + //generate SPARQL query templates + logger.info("Generating SPARQL query templates..."); + mon.start(); + Set<Template> templates = templateGenerator.buildTemplates(question); + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Templates:"); + for(Template t : templates){ + logger.info(t); + } + + //generate candidate SPQRL queries + List<String> possibleSPARQLQueries = getPossibleSPARQLQueries(templates); + + //test candidates on remote endpoint + validateAgainstRemoteEndpoint(possibleSPARQLQueries); + + //test candidates on local model + validateAgainstLocalModel(possibleSPARQLQueries); + + } + + private Model getWorkingModel(List<String> resources){ + logger.info("Generating local model..."); + mon.start(); + Model workingModel = ModelFactory.createDefaultModel(); + Model model; + for(String resource : resources){ + model = modelGenenerator.createModel(resource, Strategy.CHUNKS, RECURSION_DEPTH); + workingModel.add(model); + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Local model contains " + workingModel.size() + " triples."); + return workingModel; + } + + private List<String> getPossibleSPARQLQueries(Set<Template> templates){ + logger.info("Generating candidate SPARQL queries..."); + mon.start(); + List<String> queries = new ArrayList<String>(); + Query query; + for(Template template : templates){ + query = template.getQuery(); + queries.add(query.toString()); + for(Slot slot : template.getSlots()){ + Set<String> tmp = new HashSet<String>(); + String var = slot.getAnchor(); + List<String> words = slot.getWords(); + for(String uri : getCandidateURIs(slot)){ + for(String q : queries){ + tmp.add(q.replace("?" + var, "<" + uri + ">")); + } + } + if(!words.isEmpty()){ + queries.clear(); + queries.addAll(tmp); + } + } + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + return queries; + } + + private Set<String> getCandidateURIs(Slot slot){ + logger.info("Generating candidate URIs for " + slot.getWords() + "..."); + mon.start(); + SolrSearch index = null; + Set<String> uris = new HashSet<String>(); + if(slot.getSlotType() == SlotType.CLASS){ + index = class_index; + } else if(slot.getSlotType() == SlotType.PROPERTY){ + index = property_index; + } else if(slot.getSlotType() == SlotType.RESOURCE){ + index = resource_index; + } + for(String word : slot.getWords()){ + uris.addAll(index.getResources("label:" + word)); + + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Candiate URIs: " + uris); + return uris; + } + + private void validateAgainstRemoteEndpoint(List<String> queries){ + logger.info("Testing candidate SPARQL queries on remote endpoint..."); + mon.start(); + for(String query : queries){ + logger.info("Testing query:\n" + query); + List<String> results = getResultFromRemoteEndpoint(query); + logger.info("Result: " + results); + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + } + + private void validateAgainstLocalModel(List<String> queries){ + List<String> resources = resource_index.getResources(question); + + Model model = getWorkingModel(resources); + + for(String query : queries){ + System.out.println("Testing query:\n" + query); + List<String> results = getResultFromLocalModel(query, model); + System.out.println("Result: " + results); + } + } + + private List<String> getResultFromRemoteEndpoint(String query){ + List<String> resources = new ArrayList<String>(); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 1")); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + resources.add(qs.get("y").toString()); + } + return resources; + } + + private List<String> getResultFromLocalModel(String query, Model model){ + List<String> resources = new ArrayList<String>(); + QueryExecution qe = QueryExecutionFactory.create(query, model); + ResultSet rs = qe.execSelect(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + resources.add(qs.get("y").toString()); + } + return resources; + } + + + /** + * @param args + * @throws MalformedURLException + */ + public static void main(String[] args) throws MalformedURLException { + SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://db0.aksw.org:8999/sparql"), + Collections.<String>singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + learner.setEndpoint(endpoint); + learner.learnSPARQLQueries("Give me all countries in Europe"); +// learner.learnSPARQLQueries("Give me all soccer clubs in Premier League"); + + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Case.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Case.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Case.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,5 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +public enum Case implements MorphologicalProperty { + NOM, GEN, DAT, ACC +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Feature.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Feature.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Feature.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,186 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +import java.util.regex.Pattern; + +/** + * Object to store the morphological properties of a TreeNode (root node) or the + * morphological requirements of a SubstNode. + * + * @author felix + * + */ +public class Feature { + + private Case c; + private Numerus n; + private Gender g; + private Prep_de p; + + public Feature(Case c, Numerus n, Gender g, Prep_de p) { + this.c = c; + this.n = n; + this.g = g; + this.p = p; + } + + /** + * used by the ltagParser to construct the Feature object from a string. + * + * @param s + * e.g. "{c:nom,g:g}" + * @return + */ + public static Feature construct(String s) { + + Case c = null; + Numerus n = null; + Gender g = null; + Prep_de p = null; + + String[] parts = s.replaceAll("\\{|\\}", "").split(","); + for (String x : parts) { + if (Pattern.matches("(c:(nom|gen|dat|acc))", x)) { + if (x.substring(2).equals("nom")) { + c = Case.NOM; + } + if (x.substring(2).equals("gen")) { + c = Case.GEN; + } + if (x.substring(2).equals("dat")) { + c = Case.DAT; + } + if (x.substring(2).equals("acc")) { + c = Case.ACC; + } + } + if (Pattern.matches("(n:(sg|pl))", x)) { + if (x.substring(2).equals("sg")) { + n = Numerus.SG; + } + if (x.substring(2).equals("pl")) { + n = Numerus.PL; + } + } + if (Pattern.matches("(g:(m|f|n))", x)) { + if (x.substring(2).equals("m")) { + g = Gender.M; + } + if (x.substring(2).equals("f")) { + g = Gender.F; + } + if (x.substring(2).equals("n")) { + g = Gender.N; + } + } + if (Pattern.matches("(p:(an|durch))", x)) { + if (x.substring(2).equals("an")) { + p = Prep_de.AN; + } + if (x.substring(2).equals("durch")) { + p = Prep_de.DURCH; + } + } + } + if (c == null && g == null && n == null && p == null) { + return null; + } else { + return new Feature(c, n, g, p); + } + + } + + public String toString() { + String cStr = ""; + String nStr = ""; + String gStr = ""; + String pStr = ""; + if (c != null) { + cStr = "c:" + c.toString().toLowerCase() + " "; + } + if (n != null) { + nStr = "n:" + n.toString().toLowerCase() + " "; + } + if (g != null) { + gStr = "g:" + g.toString().toLowerCase(); + } + if (p != null) { + pStr = "p:" + p.toString().toLowerCase(); + } + + return ("{" + cStr + nStr + gStr + pStr + "}").trim().replaceAll(" ", ","); + } + + public Case getC() { + return c; + } + + public void setC(Case c) { + this.c = c; + } + + public Numerus getN() { + return n; + } + + public void setN(Numerus n) { + this.n = n; + } + + public Gender getG() { + return g; + } + + public void setG(Gender g) { + this.g = g; + } + + public Prep_de getP() { + return p; + } + + public void setP(Prep_de p) { + this.p = p; + } + + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((c == null) ? 0 : c.hashCode()); + result = prime * result + ((g == null) ? 0 : g.hashCode()); + result = prime * result + ((n == null) ? 0 : n.hashCode()); + result = prime * result + ((p == null) ? 0 : p.hashCode()); + return result; + } + + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof Feature)) + return false; + Feature other = (Feature) obj; + if (c == null) { + if (other.c != null) + return false; + } else if (!c.equals(other.c)) + return false; + if (g == null) { + if (other.g != null) + return false; + } else if (!g.equals(other.g)) + return false; + if (n == null) { + if (other.n != null) + return false; + } else if (!n.equals(other.n)) + return false; + if (p == null) { + if (other.p != null) + return false; + } else if (!p.equals(other.p)) + return false; + return true; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Gender.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Gender.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Gender.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,5 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +public enum Gender implements MorphologicalProperty { + M,F,N +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/LexicalSelection.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/LexicalSelection.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/LexicalSelection.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,5 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +public interface LexicalSelection { + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/MorphologicalProperty.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/MorphologicalProperty.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/MorphologicalProperty.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,10 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +/** + * a morphological property can currently be Case, Numerus or Gender. + * @author felix + * + */ +public interface MorphologicalProperty { + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Numerus.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Numerus.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Numerus.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,5 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +public enum Numerus implements MorphologicalProperty { + SG,PL +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Prep_de.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Prep_de.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Prep_de.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,5 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +public enum Prep_de implements LexicalSelection { + AN, DURCH +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,76 @@ +package org.dllearner.algorithm.tbsl.ltag.agreement; + +/** + * implements the unification of two Feature objects. This is used by + * earleyParser.SubstPredictor() to determine if a tree can be substituted into + * a SubstNode if the SubstNode has Feature requirements. + * + * @author felix + * + */ +public class Unification { + + /** + * @param a + * FeatureConstraints from the SubstNode + * @param b + * Feature from the RootNode of the Tree candidate + */ + public static boolean isUnifiable(Feature a, Feature b) { + + if (a == null && b == null) { + return true; + } + else if (a == null && b != null) { + if (b.getP() == null) { return true;} + else { return false; } + } + else if (b == null && a != null) { + if (a.getP() == null) { return true; } + else { return false; } + } else { + if (a.equals(b)) { + return true; + } else { + if (unify(a.getC(), b.getC()) && unify(a.getN(), b.getN()) + && unify(a.getG(), b.getG()) + && unify(a.getP(), b.getP())) { + return true; + } + return false; + } + } + } + + private static boolean unify(MorphologicalProperty a, + MorphologicalProperty b) { + if (a == null || b == null) { + return true; + } else { + if (a.equals(b)) { + return true; + } else { + return false; + } + } + } + + private static boolean unify(LexicalSelection a, LexicalSelection b) { + if (a == null && b == null) { + return true; + } else if (a.equals(b)) { + return true; + } + return false; + } + + public static void main(String[] args) { + Feature a = Feature.construct("{c:nom,n:pl,p:durch}"); + Feature b = Feature.construct("{c:nom,p:durch}"); + + System.out.println(a); + System.out.println(b); + System.out.println(Unification.isUnifiable(a, b)); + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Category.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Category.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Category.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,8 @@ +package org.dllearner.algorithm.tbsl.ltag.data; + +/** defines the categories a node in an LTAG tree can have. **/ +public enum Category { + + S, WH, NP, DP, VP, V, DET, PP, N, P, ADV, PART, ADJ, ADJCOMP, PUNCT, CC, EX, NUM, C, NEG + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,195 @@ +package org.dllearner.algorithm.tbsl.ltag.data; + +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.algorithm.tbsl.ltag.agreement.Feature; + + +/** + * A FootNode represents a TreeNode in an auxiliary tree which has the same + * category as the root node. No adjunction is allowed at a FootNode. + **/ + +public class FootNode implements TreeNode { + + Category category; + Tree parent; + boolean NA = false; + + public FootNode(Category cat) { + category = cat; + parent = null; + } + + public FootNode clone() { + FootNode out = new FootNode(category); + out.NA = NA; + return out; + } + + public TreeNode adjoin(String label, TreeNode tree) + throws UnsupportedOperationException { + + if (tree.isAuxTree()) { + + FootNode output = new FootNode(category); + + output.setChildren(new ArrayList<TreeNode>()); + + return output; + } else { + throw new UnsupportedOperationException( + "adjoin failed at foot node because the following argument is not an auxiliary tree:\n" + + tree.toString()); + } + } + + public FootNode substitute(String index, TreeNode tree) { + return this; + } + + public Tree replaceFoot(List<TreeNode> trees) { + Tree output = new Tree(); + + output.setCategory(category); + output.setChildren(trees); + + return output; + } + + public boolean isAuxTree() { + + List<FootNode> footNodes = getFootNodes(); + + if (footNodes.size() == 1) { + FootNode footNode = footNodes.get(0); + return (category.equals(footNode.category)); + } else { + return false; + } + } + + public List<FootNode> getFootNodes() { + + List<FootNode> output = new ArrayList<FootNode>(); + + output.add(this); + + return output; + + } + + public List<TerminalNode> getTerminalNodes() { + List<TerminalNode> output = new ArrayList<TerminalNode>(); + return output; + } + + public Category getCategory() { + return category; + } + + public void setCategory(Category cat) { + category = cat; + } + + public List<TreeNode> getChildren() { + ArrayList<TreeNode> output = new ArrayList<TreeNode>(); + return output; + } + + public void setChildren(List<TreeNode> treelist) { + + } + + public Tree getParent() { + return parent; + } + + public void setParent(Tree tree) { + parent = tree; + } + + public void setParentForTree() { + } + + public String toString() { + return category + "*"; + } + + public String toFileString() { + return this.getCategory().toString()+"*"; + } + + public String toString(String indent) { + return indent + category + "*"; + } + + public TreeNode getRightSibling() { + int idx = this.parent.children.indexOf(this); + + // if this is rightmost children of parent, + if (idx == parent.children.size() - 1) { + return null; + } + + else { + // return right sibling + return parent.children.get(idx + 1); + } + } + + public boolean getAdjConstraint() { + return NA; + } + + public void setAdjConstraint(boolean x) { + NA = x; + } + + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((category == null) ? 0 : category.hashCode()); + return result; + } + + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof FootNode)) + return false; + FootNode other = (FootNode) obj; + if (category == null) { + if (other.category != null) + return false; + } else if (!category.equals(other.category)) + return false; + return true; + } + + public String getAnchor() { + return ""; + } + + public Feature getFeature() { + return null; + } + + public void setFeature(Feature f) {} + + + public TreeNode isGovernedBy(Category cat) { + if (this.getParent() == null) { + return null; + } else if (this.getParent().getCategory().equals(cat)){ + return this.getParent(); + } else { + return this.getParent().isGovernedBy(cat); + } + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/LTAG_Tree_Constructor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/LTAG_Tree_Constructor.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/LTAG_Tree_Constructor.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,23 @@ +package org.dllearner.algorithm.tbsl.ltag.data; + +import java.io.StringReader; + +import org.dllearner.algorithm.tbsl.ltag.reader.LTAGTreeParser; +import org.dllearner.algorithm.tbsl.ltag.reader.ParseException; + + +public class LTAG_Tree_Constructor { + + public TreeNode construct(String string) throws ParseException + { + // new TreeNode interface in fracosem.ltag + TreeNode tree; + LTAGTreeParser parser = new LTAGTreeParser(new StringReader(new String(string))); + parser.ReInit(new StringReader(new String(string))); + tree = parser.Tree(); + + return tree; + } + + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java 2011-03-18 09:35:21 UTC (rev 2726) @@ -0,0 +1,201 @@ +package org.dllearner.algorithm.tbsl.ltag.data; + +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.algorithm.tbsl.ltag.agreement.Feature; + + +/** + * A SubstNode represents a TreeNode within a tree where the substitution + * operation is applicable. It has a field index that stores the (string) id of + * this substitution node + **/ + +public class SubstNode implements TreeNode { + + Category category; + String index; + Tree parent; + Feature constraints; + + public SubstNode(String ind, Category cat, Feature f) { + + category = cat; + index = ind; + parent = null; + constraints = f; + + } + + public SubstNode adjoin(String label, TreeNode tree) { + return this; + } + + public TreeNode substitute(String ind, TreeNode tree) { + if (index.equals(ind)) { + return tree; + } else { + return this; + } + + } + + public SubstNode replaceFoot(List<TreeNode> trees) { + return this; + } + + public boolean isAuxTree() { + return false; + } + + public List<FootNode> getFootNodes() { + List<FootNode> output = new ArrayList<FootNode>(); + return output; + } + + public List<TerminalNode> getTerminalNodes() { + List<TerminalNode> output = new ArrayList<TerminalNode>(); + return output; + } + + public Category getCategory() { + return category; + } + + public void setCategory(Category cat) { + category = cat; + } + + public List<TreeNode> getChildren() { + ArrayList<TreeNode> output = new ArrayList<TreeNode>(); + return output; + } + + public SubstNode clone() { + return new SubstNode(index, category, constraints); + } + + public void setChildren(List<TreeNode> treelist) { + + } + + public Tree getParent() { + return parent; + } + + public void setParent(Tree tree) { + parent = tree; + + } + + public void setParentForTree() { + } + + public String toString() { + return category.toString() + "[" + index + "]"; + } + + public String toString(String indent) { + return indent + category.toString() + "[" + index + "]"; + } + + public String toFileString() { + String constStr = ""; + if (constraints!=null) { + constStr = constraints.toString(); + } + return this.getCategory().toString() + "[" + this.index + "]" + + constStr; + } + + public TreeNode getRightSibling() { + int idx = this.parent.children.indexOf(this); + + // if this is rightmost children of parent, + if (idx == parent.children.size() - 1) { + return null; + } + + else { + // return right sibling + return parent.children.get(idx + 1); + } + } + + public boolean getAdjConstraint() { + return false; + } + + public void setAdjConstraint(boolean x) { + } + + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((category == null) ? 0 : category.hashCode()); + result = prime * result + ((index == nu... [truncated message content] |
From: <lor...@us...> - 2011-03-20 16:29:03
|
Revision: 2728 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2728&view=rev Author: lorenz_b Date: 2011-03-20 16:28:56 +0000 (Sun, 20 Mar 2011) Log Message: ----------- Removed dependency to autosparql project. Added some logging. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/Search.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-03-20 16:27:26 UTC (rev 2727) +++ trunk/components-ext/pom.xml 2011-03-20 16:28:56 UTC (rev 2728) @@ -1,35 +1,90 @@ -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> - <groupId>org.dllearner</groupId> - <artifactId>components-ext</artifactId> - <packaging>jar</packaging> + <groupId>org.dllearner</groupId> + <artifactId>components-ext</artifactId> + <packaging>jar</packaging> - <name>components-ext</name> - <url>http://aksw.org/Projects/DLLearner</url> - - <parent> - <groupId>org.dllearner</groupId> - <artifactId>dllearner-parent</artifactId> - <version>1.0-SNAPSHOT</version> - </parent> - <dependencies> - <dependency> - <groupId>org.dllearner</groupId> - <artifactId>components-core</artifactId> - </dependency> - <dependency> - <groupId>org.dllearner</groupId> - <artifactId>autosparql</artifactId> - <version>1.0-SNAPSHOT</version> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>com.jamonapi</groupId> - <artifactId>jamon</artifactId> - <version>2.7</version> - <type>jar</type> - <scope>compile</scope> - </dependency> - </dependencies> + <name>components-ext</name> + <url>http://aksw.org/Projects/DLLearner</url> + + <repositories> + <repository> + <id>Simmetrics</id> + <url>http://maven.mse.jhu.edu/m2repository/</url> + </repository> + </repositories> + + <parent> + <groupId>org.dllearner</groupId> + <artifactId>dllearner-parent</artifactId> + <version>1.0-SNAPSHOT</version> + </parent> + <dependencies> + <dependency> + <groupId>org.dllearner</groupId> + <artifactId>components-core</artifactId> + </dependency> + <dependency> + <groupId>com.jamonapi</groupId> + <artifactId>jamon</artifactId> + <version>2.7</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.solr</groupId> + <artifactId>solr-core</artifactId> + <version>1.4.1</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>edu.stanford</groupId> + <artifactId>postagger</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>library</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>core</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>ner</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>jaws</groupId> + <artifactId>core</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>uk.ac.shef.wit</groupId> + <artifactId>simmetrics</artifactId> + <version>1.6.2</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>stax</groupId> + <artifactId>stax-api</artifactId> + <version>1.0.1</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>woodstox</groupId> + <artifactId>wstx-api</artifactId> + <version>3.2.0</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + </dependencies> </project> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-03-20 16:27:26 UTC (rev 2727) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-03-20 16:28:56 UTC (rev 2728) @@ -9,17 +9,17 @@ import java.util.Set; import org.apache.log4j.Logger; +import org.dllearner.algorithm.qtl.util.ModelGenerator; +import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; +import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.autosparql.server.search.SolrSearch; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.sparqlquerygenerator.util.ModelGenerator; -import org.dllearner.sparqlquerygenerator.util.ModelGenerator.Strategy; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; @@ -222,8 +222,8 @@ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://db0.aksw.org:8999/sparql"), Collections.<String>singletonList("http://dbpedia.org"), Collections.<String>emptyList()); learner.setEndpoint(endpoint); - learner.learnSPARQLQueries("Give me all countries in Europe"); -// learner.learnSPARQLQueries("Give me all soccer clubs in Premier League"); +// learner.learnSPARQLQueries("Give me all countries in Europe"); + learner.learnSPARQLQueries("Give me all soccer clubs in Premier League"); } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/Search.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/Search.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/Search.java 2011-03-20 16:28:56 UTC (rev 2728) @@ -0,0 +1,13 @@ +package org.dllearner.algorithm.tbsl.search; + +import java.util.List; + +public interface Search { + List<String> getResources(String queryString); + List<String> getResources(String queryString, int offset); + + int getTotalHits(String queryString); + void setHitsPerPage(int hitsPerPage); + + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-03-20 16:28:56 UTC (rev 2728) @@ -0,0 +1,103 @@ +package org.dllearner.algorithm.tbsl.search; + +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.BinaryRequestWriter; +import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.ModifiableSolrParams; + +public class SolrSearch implements Search{ + + private CommonsHttpSolrServer server; + + private int hitsPerPage = 10; + private int lastTotalHits = 0; + + public SolrSearch(String solrServerURL){ + try { + server = new CommonsHttpSolrServer(solrServerURL); + server.setRequestWriter(new BinaryRequestWriter()); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + + @Override + public List<String> getResources(String queryString) { + return getResources(queryString, 0); + } + + @Override + public List<String> getResources(String queryString, int offset) { + List<String> resources = new ArrayList<String>(); + QueryResponse response; + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("q", queryString); + params.set("rows", hitsPerPage); + params.set("start", offset); + response = server.query(params); + SolrDocumentList docList = response.getResults(); + lastTotalHits = (int) docList.getNumFound(); + for(SolrDocument d : docList){ + resources.add((String) d.get("uri")); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return resources; + } + + public Map<String, Float> getResourcesWithScores(String queryString) { + return getResourcesWithScores(queryString, hitsPerPage); + } + + public Map<String, Float> getResourcesWithScores(String queryString, int limit) { + return getResourcesWithScores(queryString, limit, 0); + } + + public Map<String, Float> getResourcesWithScores(String queryString, int limit, int offset) { + Map<String, Float> resource2ScoreMap = new HashMap<String, Float>(); + + QueryResponse response; + try { + SolrQuery query = new SolrQuery(); + query.setQuery(queryString); + query.setRows(hitsPerPage); + query.setStart(offset); + query.addField("score"); + query.addSortField("score", SolrQuery.ORDER.desc); + query.addSortField( "pagerank", SolrQuery.ORDER.desc ); + + response = server.query(query); + SolrDocumentList docList = response.getResults(); + lastTotalHits = (int) docList.getNumFound(); + for(SolrDocument d : docList){ + resource2ScoreMap.put((String) d.get("uri"), (Float) d.get("score")); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return resource2ScoreMap; + } + + @Override + public int getTotalHits(String queryString) { + return lastTotalHits; + } + + @Override + public void setHitsPerPage(int hitsPerPage) { + this.hitsPerPage = hitsPerPage; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-04-05 08:29:54
|
Revision: 2750 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2750&view=rev Author: lorenz_b Date: 2011-04-05 08:29:44 +0000 (Tue, 05 Apr 2011) Log Message: ----------- Migrated test cases from sparql-query-generator module. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java Added Paths: ----------- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/NBRTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/TreeSubsumptionTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-04-04 18:17:00 UTC (rev 2749) +++ trunk/components-ext/pom.xml 2011-04-05 08:29:44 UTC (rev 2750) @@ -29,9 +29,6 @@ <dependency> <groupId>com.jamonapi</groupId> <artifactId>jamon</artifactId> - <version>2.7</version> - <type>jar</type> - <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.solr</groupId> @@ -86,5 +83,18 @@ <type>jar</type> <scope>compile</scope> </dependency> + <dependency> + <groupId>com.hp.hpl.jena</groupId> + <artifactId>jena</artifactId> + </dependency> + <dependency> + <groupId>com.hp.hpl.jena</groupId> + <artifactId>arq</artifactId> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> </dependencies> </project> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2011-04-04 18:17:00 UTC (rev 2749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -77,9 +77,11 @@ negExampleTrees.addAll(getQueryTrees(negExamples)); lgg = lggGenerator.getLGG(posExampleTrees); + System.out.println(lgg.getStringRepresentation()); if(queryTreeFilter != null){ lgg = queryTreeFilter.getFilteredQueryTree(lgg); } + System.out.println(lgg.getStringRepresentation()); if(coversNegativeQueryTree(lgg)){ throw new QTLException("Could not learn SPARQL query. Reason: LGG covers negative tree."); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-04-04 18:17:00 UTC (rev 2749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -23,6 +23,7 @@ import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.core.LearningAlgorithm; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; @@ -36,7 +37,7 @@ import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; -public class SPARQLTemplateBasedLearner { +public class SPARQLTemplateBasedLearner implements LearningAlgorithm{ private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); @@ -84,9 +85,11 @@ modelGenenerator = new ModelGenerator(endpoint, predicateFilters); } - public void learnSPARQLQueries(String question){ + public void setQuestion(String question){ this.question = question; - + } + + private void learnSPARQLQueries(){ //generate SPARQL query templates logger.info("Generating SPARQL query templates..."); mon.start(); @@ -365,14 +368,20 @@ * @throws MalformedURLException */ public static void main(String[] args) throws MalformedURLException { + String question = "Give me all soccer clubs in Premier League";//Give me all countries in Europe SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://db0.aksw.org:8999/sparql"), Collections.<String>singletonList("http://dbpedia.org"), Collections.<String>emptyList()); learner.setEndpoint(endpoint); -// learner.learnSPARQLQueries("Give me all countries in Europe"); - learner.learnSPARQLQueries("Give me all soccer clubs in Premier League"); + learner.setQuestion(question); + learner.start(); } + + @Override + public void start() { + learnSPARQLQueries(); + } } Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -0,0 +1,83 @@ +package org.dllearner.algorithm.qtl; + +import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithm.qtl.operations.Generalisation; +import org.junit.Test; + +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +public class GeneralisationTest { + + private static final int RECURSION_DEPTH = 2; + private int maxModelSizePerExample = 3000; + private final static int LIMIT = 1000; + private final static int OFFSET = 1000; + private static final String ENDPOINT_URL = "http://dbpedia.org/sparql"; + + @Test + public void generalisationTest1(){ + String resource = "http://dbpedia.org/resource/Leipzig"; + + Generalisation<String> gen = new Generalisation<String>(); + Model model = getModelForExample(resource, maxModelSizePerExample); + QueryTree<String> tree = new QueryTreeFactoryImpl().getQueryTree(resource, model); + System.out.println(tree.toSPARQLQueryString()); + QueryTree<String> genTree = gen.generalise(tree); + String query = genTree.toSPARQLQueryString(); + System.out.println(query); + } + + private Model getModelForExample(String example, int maxSize){ + Query query = makeConstructQuery(example, LIMIT, 0); + QueryExecution qexec = QueryExecutionFactory.sparqlService(ENDPOINT_URL, query); + Model all = ModelFactory.createDefaultModel(); + Model model = qexec.execConstruct(); + all.add(model); + qexec.close(); + int i = 1; + while(model.size() != 0 && all.size() < maxSize){ + query = makeConstructQuery(example, LIMIT, i * OFFSET); + qexec = QueryExecutionFactory.sparqlService(ENDPOINT_URL, query); + model = qexec.execConstruct(); + all.add(model); + qexec.close(); + i++; + } + return all; + } + + private Query makeConstructQuery(String example, int limit, int offset){ + StringBuilder sb = new StringBuilder(); + sb.append("CONSTRUCT {\n"); + sb.append("<").append(example).append("> ").append("?p0 ").append("?o0").append(".\n"); + for(int i = 1; i < RECURSION_DEPTH; i++){ + sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + } + sb.append("}\n"); + sb.append("WHERE {\n"); + sb.append("<").append(example).append("> ").append("?p0 ").append("?o0").append(".\n"); + for(int i = 1; i < RECURSION_DEPTH; i++){ + sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + } + + sb.append("FILTER (!regex (?p0, \"http://dbpedia.org/property/wikiPage\") && !regex(?p1, \"http://dbpedia.org/property/wikiPage\"))"); + sb.append("}\n"); + sb.append("ORDER BY "); + for(int i = 0; i < RECURSION_DEPTH; i++){ + sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); + } + sb.append("\n"); + sb.append("LIMIT ").append(limit).append("\n"); + sb.append("OFFSET ").append(offset); + Query query = QueryFactory.create(sb.toString()); + System.out.println(sb.toString()); + return query; + } + +} Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -0,0 +1,232 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithm.qtl; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.log4j.Logger; +import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithm.qtl.examples.DBpediaExample; +import org.dllearner.algorithm.qtl.examples.LinkedGeoDataExample; +import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithm.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.algorithm.qtl.util.ModelGenerator; +import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.jamonapi.MonitorFactory; + +/** + * + * @author Lorenz Bühmann + * + */ +public class LGGTest { + + private static final Logger logger = Logger.getLogger(LGGTest.class); + + @Test + public void testLGGWithDBpediaExample(){ + QueryTreeFactory<String> factory = new QueryTreeFactoryImpl(); + + List<QueryTree<String>> posExampleTrees = DBpediaExample.getPosExampleTrees(); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("TREE " + cnt); + tree.dump(); + System.out.println("-----------------------------"); + cnt++; + } + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + lgg.dump(); + + QueryTreeImpl<String> tree = factory.getQueryTree("?"); + QueryTreeImpl<String> subTree1 = new QueryTreeImpl<String>("?"); + subTree1.addChild(new QueryTreeImpl<String>("?"), "leaderParty"); + subTree1.addChild(new QueryTreeImpl<String>("?"), "population"); + subTree1.addChild(new QueryTreeImpl<String>("Germany"), "locatedIn"); + tree.addChild(subTree1, "birthPlace"); + tree.addChild(new QueryTreeImpl<String>("?"), RDFS.label.toString()); + QueryTreeImpl<String> subTree2 = new QueryTreeImpl<String>("Person"); + subTree2.addChild(new QueryTreeImpl<String>(OWL.Thing.toString()), RDFS.subClassOf.toString()); + tree.addChild(subTree2, RDF.type.toString()); + QueryTreeImpl<String> subTree3 = new QueryTreeImpl<String>("?"); + QueryTreeImpl<String> subSubTree = new QueryTreeImpl<String>("Person"); + subSubTree.addChild(new QueryTreeImpl<String>(OWL.Thing.toString()), RDFS.subClassOf.toString()); + subTree3.addChild(subSubTree, RDFS.subClassOf.toString()); + tree.addChild(subTree3, RDF.type.toString()); + + Assert.assertTrue(lgg.isSameTreeAs(tree)); + + System.out.println(tree.toSPARQLQueryString()); + + } + + @Test + public void testLGGWithLinkedGeoDataExample(){ + QueryTreeFactory<String> factory = new QueryTreeFactoryImpl(); + + List<QueryTree<String>> posExampleTrees = LinkedGeoDataExample.getPosExampleTrees(); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("TREE " + cnt); + tree.dump(); + System.out.println("-----------------------------"); + cnt++; + } + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + lgg.dump(); + + QueryTreeImpl<String> tree = factory.getQueryTree("?"); + QueryTreeImpl<String> subTree = new QueryTreeImpl<String>("lgdo:Aerodome"); + subTree.addChild(new QueryTreeImpl<String>("lgdo:Aeroway"), RDFS.subClassOf.toString()); + tree.addChild(subTree, RDF.type.toString()); + tree.addChild(new QueryTreeImpl<String>("?"), RDFS.label.toString()); + tree.addChild(new QueryTreeImpl<String>("?"), "geo:long"); + tree.addChild(new QueryTreeImpl<String>("?"), "geo:lat"); + tree.addChild(new QueryTreeImpl<String>("?"), "georss:point"); + tree.addChild(new QueryTreeImpl<String>("?"), "lgdp:icao"); + + Assert.assertTrue(lgg.isSameTreeAs(tree)); + + System.out.println(tree.toSPARQLQueryString()); + + } + +// @Test + public void performanceTest(){ + int recursionDepth = 3; + int limit = 100; + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + ExtractionDBCache cache = new ExtractionDBCache("cache"); + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + ModelGenerator modelGen = new ModelGenerator(endpoint, predicateFilters, cache); + QueryTreeFactory<String> treeFactory = new QueryTreeFactoryImpl(); + + String queryString = "SELECT ?resource WHERE {?resource a ?class." + + " FILTER(REGEX(?resource,'http://dbpedia.org/resource'))} LIMIT " + limit; + SparqlQuery query = new SparqlQuery(queryString, endpoint); + ResultSet rs = query.send(); + + + + //load the models + SortedMap<String, Model> resource2Model = new TreeMap<String, Model>(); + Model model; + String resource; + logger.info("Resources(#triple):"); + while(rs.hasNext()){ + try { + resource = rs.next().get("resource").asResource().getURI(); + model = modelGen.createModel(resource, Strategy.CHUNKS, recursionDepth); + logger.info(resource + "(" + model.size() + ")"); + resource2Model.put(resource, model); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } +// model = modelGen.createModel("http://dbpedia.org/resource/A_Farewell_to_Arms", Strategy.CHUNKS, recursionDepth); +// logger.info("http://dbpedia.org/resource/A_Farewell_to_Arms" + "(" + model.size() + ")"); +// resource2Model.put("http://dbpedia.org/resource/A_Farewell_to_Arms", model); +// model = modelGen.createModel("http://dbpedia.org/resource/Darjeeling", Strategy.CHUNKS, recursionDepth); +// logger.info("http://dbpedia.org/resource/Darjeeling" + "(" + model.size() + ")"); +// resource2Model.put("http://dbpedia.org/resource/Darjeeling", model); + + + //create the querytrees + SortedMap<String, QueryTree<String>> resource2Tree = new TreeMap<String, QueryTree<String>>(); + Map<QueryTree<String>, String> tree2Resource = new HashMap<QueryTree<String>, String>(limit); + List<QueryTree<String>> trees = new ArrayList<QueryTree<String>>(); + QueryTree<String> tree; + for(Entry<String, Model> entry : resource2Model.entrySet()){ + try { + tree = treeFactory.getQueryTree(entry.getKey(), entry.getValue()); + trees.add(tree); + resource2Tree.put(entry.getKey(), tree); + tree2Resource.put(tree, entry.getKey()); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + + LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); + QueryTree<String> tree1; + QueryTree<String> tree2; + QueryTree<String> lgg; + for(int i = 0; i < trees.size(); i++){ + for(int j = i+1; j < trees.size(); j++){ + try { + tree1 = trees.get(i); + tree2 = trees.get(j); + lgg = lggGen.getLGG(tree1, tree2); + logger.info("LGG(" + tree2Resource.get(tree1) + ", " + tree2Resource.get(tree2) + ") needed " + + MonitorFactory.getTimeMonitor("LGG").getLastValue() + "ms"); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } +// logger.info("Tree 1:\n" + tree1.getStringRepresentation()); +// logger.info("Tree 2:\n" + tree2.getStringRepresentation()); +// logger.info("LGG:\n" + lgg.getStringRepresentation()); + } + } + logger.info("Average time to compute LGG: " + MonitorFactory.getTimeMonitor("LGG").getAvg()); + logger.info("Min time to compute LGG: " + MonitorFactory.getTimeMonitor("LGG").getMin()); + logger.info("Max time to compute LGG: " + MonitorFactory.getTimeMonitor("LGG").getMax()); + logger.info("#computed LGGs: " + MonitorFactory.getTimeMonitor("LGG").getHits()); + + + } + +} Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -0,0 +1,306 @@ +package org.dllearner.algorithm.qtl; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithm.qtl.util.ModelGenerator; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.JamonMonitorLogger; +import org.junit.Test; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +public class ModelCreationTest { + + private static final int RECURSION_DEPTH = 2; + private static final String RESOURCE = "http://dbpedia.org/resource/Dresden"; + + private static final Logger logger = Logger.getLogger(ModelCreationTest.class); + + private static final SparqlEndpoint ENDPOINT = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + + @Test + public void test1(){ + try { + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + FileAppender fileAppender = new FileAppender(layout, + "log/model_test.log", false); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.addAppender(fileAppender); + logger.setLevel(Level.DEBUG); + Logger.getLogger(ModelGenerator.class).setLevel(Level.DEBUG); + Logger.getLogger(ModelCreationTest.class).setLevel(Level.DEBUG); + + + URL url = new URL("http://lod.openlinksw.com/sparql/"); + SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + + ModelGenerator modelGen = new ModelGenerator(endpoint, predicateFilters, new ExtractionDBCache("construct-cache")); + +// logger.debug("Using chunk strategy."); +// Model model1 = modelGen.createModel(RESOURCE, ModelGenerator.Strategy.CHUNKS, 2); +// logger.debug("Got overall " + model1.size() + " triple."); +// +// logger.debug("Using incremental strategy."); +// Model model2 = modelGen.createModel(RESOURCE, ModelGenerator.Strategy.INCREMENTALLY, RECURSION_DEPTH); +// logger.debug("Got overall " + model2.size() + " triple."); + + logger.debug("Using chunk with optional strategy."); + Model model3 = modelGen.createModel(RESOURCE, ModelGenerator.Strategy.CHUNKS, 2);System.out.println(model3.size()); + logger.debug("Got overall " + model3.size() + " triple."); + QueryTreeFactory<String> f = new QueryTreeFactoryImpl(); + QueryTree<String> t = f.getQueryTree(RESOURCE, model3); + System.out.println(t.getStringRepresentation()); + +// Model diff = ModelFactory.createDefaultModel(); +// if(model1.size() > model2.size()){ +// logger.debug("Chunk strategy returned " + (model1.size() - model2.size()) + " more triple."); +// diff.add(model1.difference(model2)); +// } else if(model2.size() > model1.size()){ +// logger.debug("Incremental strategy returned " + (model2.size() - model1.size()) + " more triple."); +// diff.add(model2.difference(model1)); +// } else { +// logger.debug("Both strategies returned the same number of triple."); +// } +// +// logger.debug("Difference : "); +// Statement st = null; +// for(Iterator<Statement> i = diff.listStatements();i.hasNext(); st = i.next()){ +// logger.debug(st); +// } +// +// diff = model3.difference(model1); +// st = null; +// System.out.println("Difference between other"); +// for(Iterator<Statement> i = diff.listStatements();i.hasNext(); st = i.next()){ +// System.out.println(st); +// } +// assertTrue(model1.size() == model2.size()); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + @Test + public void multiThreadedModelCreationTest(){ + String resource = "http://dbpedia.org/resource/Munich"; + + Model model = ModelFactory.createDefaultModel(); + + int proCnt = Runtime.getRuntime().availableProcessors(); + logger.info("Number of processor: " + proCnt); + Future<Model>[] ret = new Future[proCnt]; + List<String> queries = queries = createSearchQueries("Hamburg", "Vienna", "Stuttgart", "Frankfurt", "Kiel");;//createQueries(resource, proCnt); + + ExecutorService es = Executors.newFixedThreadPool(proCnt); + for(int i = 0; i < 5; i++){ + ret[i] = es.submit(new ModelRetrievalTask(queries.get(i))); + } + + for (int i = 0; i < proCnt; i++) { + try { + model.add(ret[i].get()); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (ExecutionException e) { + e.printStackTrace(); + } + } + + es.shutdown(); + System.out.println(model.size()); + + Model singleThreadedModel = ModelFactory.createDefaultModel(); + Monitor mon = MonitorFactory.getTimeMonitor("singleThreaded").start(); + queries = createSearchQueries("Leipzig", "Berlin", "Dresden", "Munich", "Dortmund"); + for(String query : queries){ + singleThreadedModel.add(getModel(query)); + } + mon.stop(); + System.out.println("Single threaded: " + mon.getTotal()); + + } + + private class ModelRetrievalTask implements Callable<Model>{ + + private String query; + + public ModelRetrievalTask(String query){ + this.query = query; + } + + @Override + public Model call() throws Exception { + System.out.println(query); + Monitor mon = MonitorFactory.getTimeMonitor("query").start(); + JamonMonitorLogger.getTimeMonitor(ModelCreationTest.class, "time").start(); + QueryEngineHTTP queryExecution = new QueryEngineHTTP(ENDPOINT.getURL().toString(), query); + for (String dgu : ENDPOINT.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : ENDPOINT.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + Model model = queryExecution.execConstruct(); + mon.stop(); + System.out.println(mon.getLastValue()); + return model; + } + + } + + private Model getModel(String query){ + QueryEngineHTTP queryExecution = new QueryEngineHTTP(ENDPOINT.getURL().toString(), query); + for (String dgu : ENDPOINT.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : ENDPOINT.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + Model model = queryExecution.execConstruct(); + + return model; + } + + private List<String> createQueries(String resource, int cnt){ + List<String> queries = new ArrayList<String>(cnt); + for(int i = 0; i < cnt; i++){ + queries.add(createConstructQuery(resource, 50, i * 50)); + } + + return queries; + } + + private String createConstructQuery(String resource, int limit, int offset){ + + StringBuilder sb = new StringBuilder(); + sb.append("CONSTRUCT {\n"); + sb.append("<").append(resource).append("> ").append("?p ").append("?o").append(".\n"); + sb.append("}\n"); + sb.append("WHERE {\n"); + sb.append("<").append(resource).append("> ").append("?p ").append("?o").append(".\n"); + + sb.append("}\n"); + sb.append(" LIMIT ").append(limit).append(" OFFSET ").append(offset); + + return sb.toString(); + } + + private List<String> createSearchQueries(String ... searchTerms){ + List<String> queries = new ArrayList<String>(); + for(String term : searchTerms){ + queries.add(createSearchQuery(term)); + } + return queries; + } + + private String createSearchQuery(String searchTerm){ + StringBuilder sb = new StringBuilder(); + sb.append("CONSTRUCT"); + sb.append("{?s ?p ?o.}"); + sb.append("WHERE"); + sb.append("{?s ?p ?o."); + sb.append("?s rdfs:label ?label."); + sb.append("?label bif:contains '").append(searchTerm).append("'.} LIMIT 1000"); + +// sb.append("SELECT ?s ?label WHERE {?s rdfs:label ?label. ?label bif:contains '").append(searchTerm).append("'.} limit 500"); + return sb.toString(); + } + +// +// class ModelProducer implements Runnable{ +// +// protected BlockingQueue<Model> queue; +// private int offset; +// +// public ModelProducer(BlockingQueue<Model> queue, int offset) { +// this.queue = queue; +// this.offset = offset; +// } +// +// @Override +// public void run() { +// Model model = getModel(offset);System.out.println(offset + " -> " + model.size()); +// try { +// queue.put(model); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } +// } +// +// private Model getModel(int offset){ +// Query query = QueryFactory.create( +// "CONSTRUCT " + +// "{<http://dbpedia.org/resource/Dresden> ?p1 ?o1. ?o1 ?p2 ?o2.}" + +// " WHERE " + +// "{<http://dbpedia.org/resource/Dresden> ?p1 ?o1. ?o1 ?p2 ?o2.}" + +// "LIMIT 100 OFFSET " + offset); +// +// QueryExecution qexec = QueryExecutionFactory.sparqlService( +// ENDPOINT.getURL().toString(), +// query, +// ENDPOINT.getDefaultGraphURIs(), +// ENDPOINT.getNamedGraphURIs()); +// return qexec.execConstruct(); +// } +// +// } +// +// class ModelConsumer implements Runnable{ +// +// protected BlockingQueue<Model> queue; +// private Model completeModel; +// +// public ModelConsumer(BlockingQueue<Model> queue) { +// this.queue = queue; +// completeModel = ModelFactory.createDefaultModel(); +// } +// +// @Override +// public void run() { +// while(true){ +// try { +// Model m = queue.take(); +// completeModel.add(m); +// System.out.println(completeModel.size()); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } +// } +// } +// +// } + +} Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/NBRTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/NBRTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/NBRTest.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -0,0 +1,196 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithm.qtl; + +import java.util.List; + +import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.examples.DBpediaExample; +import org.dllearner.algorithm.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.algorithm.qtl.operations.nbr.NBRGenerator; +import org.dllearner.algorithm.qtl.operations.nbr.NBRGeneratorImpl; +import org.dllearner.algorithm.qtl.operations.nbr.strategy.BruteForceNBRStrategy; +import org.dllearner.algorithm.qtl.operations.nbr.strategy.GreedyNBRStrategy; +import org.dllearner.algorithm.qtl.operations.nbr.strategy.TagNonSubsumingPartsNBRStrategy; +import org.junit.Test; + +/** + * + * @author Lorenz Bühmann + * + */ +public class NBRTest { + + @Test + public void computeSingleNBRBruteForce(){ + + List<QueryTree<String>> posExampleTrees = DBpediaExample.getPosExampleTrees(); + List<QueryTree<String>> negExampleTrees = DBpediaExample.getNegExampleTrees(); + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + NBRGenerator<String> nbrGenerator = new NBRGeneratorImpl<String>(new BruteForceNBRStrategy<String>()); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("POSITIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + System.out.println(lgg.getStringRepresentation()); + + System.out.println("-----------------------------------------------"); + + cnt = 1; + for(QueryTree<String> tree : negExampleTrees){ + System.out.println("NEGATIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> nbr = nbrGenerator.getNBR(lgg, negExampleTrees); + + System.out.println("NBR"); + System.out.println(nbr.getStringRepresentation()); + + } + + @Test + public void computeAllNBRsBruteForce(){ + List<QueryTree<String>> posExampleTrees = DBpediaExample.getPosExampleTrees(); + List<QueryTree<String>> negExampleTrees = DBpediaExample.getNegExampleTrees(); + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + NBRGenerator<String> nbrGenerator = new NBRGeneratorImpl<String>(new BruteForceNBRStrategy<String>()); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("POSITIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + System.out.println(lgg.getStringRepresentation()); + + System.out.println("-----------------------------------------------"); + + cnt = 1; + for(QueryTree<String> tree : negExampleTrees){ + System.out.println("NEGATIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + List<QueryTree<String>> nbrs = nbrGenerator.getNBRs(lgg, negExampleTrees); + cnt = 1; + for(QueryTree<String> tree : nbrs){ + System.out.println("NBR " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + } + + @Test + public void computeSingleNBRWithTaggingNonSubsumingParts(){ + List<QueryTree<String>> posExampleTrees = DBpediaExample.getPosExampleTrees(); + List<QueryTree<String>> negExampleTrees = DBpediaExample.getNegExampleTrees(); + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + NBRGenerator<String> nbrGenerator = new NBRGeneratorImpl<String>(new TagNonSubsumingPartsNBRStrategy<String>()); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("POSITIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + System.out.println(lgg.getStringRepresentation()); + + System.out.println("-----------------------------------------------"); + + cnt = 1; + for(QueryTree<String> tree : negExampleTrees){ + System.out.println("NEGATIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> nbr = nbrGenerator.getNBR(lgg, negExampleTrees); + + System.out.println("NBR"); + System.out.println(nbr.getStringRepresentation()); + } + + @Test + public void computeSingleNBRGreedy(){ + List<QueryTree<String>> posExampleTrees = DBpediaExample.getPosExampleTrees(); + List<QueryTree<String>> negExampleTrees = DBpediaExample.getNegExampleTrees(); + + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + NBRGenerator<String> nbrGenerator = new NBRGeneratorImpl<String>(new GreedyNBRStrategy<String>()); + + int cnt = 1; + for(QueryTree<String> tree : posExampleTrees){ + System.out.println("POSITIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); + + System.out.println("LGG"); + System.out.println(lgg.getStringRepresentation()); + + System.out.println("-----------------------------------------------"); + + cnt = 1; + for(QueryTree<String> tree : negExampleTrees){ + System.out.println("NEGATIVE EXAMPLE TREE " + cnt); + System.out.println(tree.getStringRepresentation()); + System.out.println("-----------------------------------------------"); + cnt++; + } + + QueryTree<String> nbr = nbrGenerator.getNBR(lgg, negExampleTrees); + + System.out.println("NBR"); + System.out.println(nbr.getStringRepresentation()); + } + +} Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/TreeSubsumptionTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/TreeSubsumptionTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/TreeSubsumptionTest.java 2011-04-05 08:29:44 UTC (rev 2750) @@ -0,0 +1,67 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithm.qtl; + +import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import org.junit.Assert; +import org.junit.Test; + + + +/** + * + * @author Lorenz Bühmann + * + */ +public class TreeSubsumptionTest{ + + @Test + public void test1(){ + QueryTreeImpl<String> tree1 = new QueryTreeImpl<String>("A"); + QueryTreeImpl<String> tree2 = new QueryTreeImpl<String>("?"); + Assert.assertTrue(tree1.isSubsumedBy(tree2)); + } + + @Test + public void test2(){ + QueryTreeImpl<String> tree1 = new QueryTreeImpl<String>("A"); + tree1.addChild(new QueryTreeImpl<String>("B"), "r"); + + QueryTreeImpl<String> tree2 = new QueryTreeImpl<String>("?"); + QueryTreeImpl<String> child = new QueryTreeImpl<String>("A"); + child.addChild(new QueryTreeImpl<String>("B"), "r"); + tree2.addChild(child, "r"); + Assert.assertFalse(tree1.isSubsumedBy(tree2)); + } + + @Test + public void test3(){ + QueryTreeImpl<String> tree1 = new QueryTreeImpl<String>("?"); + tree1.addChild(new QueryTreeImpl<String>("B"), "r"); + tree1.addChild(new QueryTreeImpl<String>("A"), "s"); + + QueryTreeImpl<String> tree2 = new QueryTreeImpl<String>("?"); + tree2.addChild(new QueryTreeImpl<String>("A"), "r"); + tree2.addChild(new QueryTreeImpl<String>("B"), "r"); + tree2.addChild(new QueryTreeImpl<String>("C"), "s"); + Assert.assertFalse(tree2.isSubsumedBy(tree1)); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-05-12 14:17:55
|
Revision: 2798 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2798&view=rev Author: lorenz_b Date: 2011-05-12 14:17:45 +0000 (Thu, 12 May 2011) Log Message: ----------- Added some NLP stuff for POS, Lemmatization and NER. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Lemmatizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/NER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Tokenizer.java trunk/components-ext/src/main/resources/tbsl/models/en-pos-maxent.bin trunk/components-ext/src/main/resources/tbsl/models/en-token.bin trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/LemmatizationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-05-12 11:57:08 UTC (rev 2797) +++ trunk/components-ext/pom.xml 2011-05-12 14:17:45 UTC (rev 2798) @@ -91,10 +91,21 @@ <groupId>com.hp.hpl.jena</groupId> <artifactId>arq</artifactId> </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <scope>test</scope> - </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-tools</artifactId> + <version>1.5.1-incubating</version> + </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-maxent</artifactId> + <version>3.0.1-incubating</version> + </dependency> + </dependencies> </project> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,77 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.postag.POSModel; +import opennlp.tools.postag.POSTaggerME; +import opennlp.tools.util.Sequence; + +public class ApachePartOfSpeechTagger implements PartOfSpeechTagger{ + + private POSTaggerME tagger; + private static final String MODEL_PATH = "src/main/resources/tbsl/models/en-pos-maxent.bin"; + + private Tokenizer tokenizer; + + public ApachePartOfSpeechTagger() { + + InputStream modelIn = null; + POSModel model = null; + try { + modelIn = new FileInputStream(MODEL_PATH); + model = new POSModel(modelIn); + } + catch (IOException e) { + // Model loading failed, handle the error + e.printStackTrace(); + } + finally { + if (modelIn != null) { + try { + modelIn.close(); + } + catch (IOException e) { + } + } + } + + tagger = new POSTaggerME(model); + + tokenizer = new ApacheTokenizer(); + } + + @Override + public String tag(String sentence) { + String[] tokens = tokenizer.tokenize(sentence); + String[] tags = tagger.tag(tokens); + + return convert2TaggedSentence(tokens, tags); + } + + @Override + public List<String> tagTopK(String sentence) { + List<String> taggedSentences = new ArrayList<String>(); + String[] tokens = tokenizer.tokenize(sentence); + Sequence[] sequences = tagger.topKSequences(tokens); + for(Sequence s : sequences){ + taggedSentences.add(convert2TaggedSentence(tokens, (String[])s.getOutcomes().toArray(new String[s.getOutcomes().size()]))); + } + return taggedSentences; + } + + private String convert2TaggedSentence(String[] words, String[] tags){ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < words.length; i++){ + sb.append(words[i]).append("/").append(tags[i]); + if(i < words.length-1){ + sb.append(" "); + } + } + return sb.toString().trim(); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,42 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import opennlp.tools.tokenize.TokenizerME; +import opennlp.tools.tokenize.TokenizerModel; + +public class ApacheTokenizer implements Tokenizer{ + + private opennlp.tools.tokenize.Tokenizer tokenizer; + private static final String MODEL_FILE = "src/main/resources/tbsl/models/en-token.bin"; + + public ApacheTokenizer() { + InputStream modelIn = null; + TokenizerModel model = null; + try { + modelIn = new FileInputStream(MODEL_FILE); + model = new TokenizerModel(modelIn); + } + catch (IOException e) { + e.printStackTrace(); + } + finally { + if (modelIn != null) { + try { + modelIn.close(); + } + catch (IOException e) { + } + } + } + tokenizer = new TokenizerME(model); + } + + @Override + public String[] tokenize(String sentence) { + return tokenizer.tokenize(sentence); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,76 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class DBpediaSpotlightNER implements NER{ + + private static final String SERVICE_URL = "http://spotlight.dbpedia.org/rest/annotate?"; + + /* + * Confidence (range 0-1): how reliable is the disambiguation? Regulates precision and recall. + */ + private static final double CONFIDENCE = 0.4; + + /* + * Support (range 0-infinity): do you want only "common" concepts? + * Specifies the number of Wikipedia inlinks that a concept must have in order to be annotated. + */ + private static final int SUPPORT = 20; + + public DBpediaSpotlightNER(){ + + } + + @Override + public List<String> getNamedEntitites(String sentence) { + List<String> namedEntities = new ArrayList<String>(); + try { + URL url = new URL(SERVICE_URL + "text=" + URLEncoder.encode(sentence, "UTF-8") + "&confidence=" + CONFIDENCE + "&support=" + SUPPORT); + URLConnection conn = url.openConnection ();conn.setRequestProperty("accept", "application/json"); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuffer sb = new StringBuffer(); + String line; + while ((line = rd.readLine()) != null) { + sb.append(line); + } + rd.close(); + JSONObject json = new JSONObject(sb.toString()); + JSONArray array = json.getJSONArray("Resources"); + JSONObject entityObject; + for(int i = 0; i < array.length(); i++){ + entityObject = array.getJSONObject(i); + System.out.println("Entity: " + entityObject.getString("@surfaceForm")); + System.out.println("DBpedia URI: " + entityObject.getString("@URI")); + System.out.println("Types: " + entityObject.getString("@types")); + namedEntities.add(entityObject.getString("@surfaceForm")); + + } + + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (JSONException e) { + e.printStackTrace(); + } + return namedEntities; + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Lemmatizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Lemmatizer.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Lemmatizer.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.List; + +public interface Lemmatizer { + + /** + * Lemmatize the word. + * @param word the word to lemmatize + * @return the stemmed word + */ + String stem(String word); + + /** + * Lemmatize the word, being sensitive to the tag. + * @param word the word to lemmatize + * @param tag + * @return the stemmed word + */ + String stem(String word, String tag); + + /** + * Lemmatize a list of words. The result will be in the same order as the input. + * @param words the words to lemmatize + * @return a list of stemmed words in the same order as the input list + */ + List<String> stem(List<String> words); + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Lemmatizer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/NER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/NER.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/NER.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,9 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.List; + +public interface NER { + + List<String> getNamedEntitites(String sentence); + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/NER.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,11 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.List; + +public interface PartOfSpeechTagger { + + String tag(String sentence); + + List<String> tagTopK(String sentence); + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,42 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.ArrayList; +import java.util.List; + +import edu.stanford.nlp.process.Morphology; + +/* + * Morphology computes the base form of English words, by removing just inflections (not derivational morphology). + * That is, it only does noun plurals, pronoun case, and verb endings, and not things like comparative adjectives or + * derived nominals. It is based on a finite-state transducer implemented by John Carroll et al., written in flex + * and publicly available. See: http://www.informatics.susx.ac.uk/research/nlp/carroll/morph.html . + */ +public class StanfordLemmatizer implements Lemmatizer{ + + private Morphology stemmer; + + public StanfordLemmatizer(){ + stemmer = new Morphology(); + } + + @Override + public String stem(String word) { + return stemmer.stem(word); + } + + @Override + public String stem(String word, String tag) { + return stemmer.stem(word, tag).word(); + } + + @Override + public List<String> stem(List<String> words) { + List<String> stemmedWords = new ArrayList<String>(); + for(String word : words){ + stemmedWords.add(stem(word)); + } + return stemmedWords; + } + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,53 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import edu.stanford.nlp.ling.HasWord; +import edu.stanford.nlp.ling.TaggedWord; +import edu.stanford.nlp.tagger.maxent.MaxentTagger; + +public class StanfordPartOfSpeechTagger implements PartOfSpeechTagger{ + + private MaxentTagger tagger; + + public StanfordPartOfSpeechTagger(){ + try { +// String modelPath = ClassLoader.getSystemResource("tbsl/models/bidirectional-distsim-wsj-0-18.tagger").toString(); + String modelPath = "src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; + tagger = new MaxentTagger(modelPath); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + + @Override + public String tag(String sentence) { + String out = ""; + + ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + + StringReader reader = new StringReader(sentence); + List<ArrayList<? extends HasWord>> text = tagger.tokenizeText(reader); + + if (text.size() == 1) { + tagged = tagger.processSentence(text.get(0)); + } + + for (TaggedWord t : tagged) { + out += " " + t.toString(); + } + return out.trim(); + } + + @Override + public List<String> tagTopK(String sentence) { + return Collections.singletonList(tag(sentence)); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Tokenizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Tokenizer.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Tokenizer.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,8 @@ +package org.dllearner.algorithm.tbsl.nlp; + + +public interface Tokenizer { + + String[] tokenize(String sentence); + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/Tokenizer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-12 11:57:08 UTC (rev 2797) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -1,6 +1,5 @@ package org.dllearner.algorithm.tbsl.templator; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -14,6 +13,8 @@ import org.dllearner.algorithm.tbsl.ltag.parser.LTAG_Lexicon_Constructor; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; @@ -25,7 +26,7 @@ String[] GRAMMAR_FILES = {"src/main/resources/tbsl/lexicon/english.lex"}; - POStagger tagger; + PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); Parser p; @@ -35,14 +36,7 @@ g = LTAG_Constructor.construct(Arrays.asList(GRAMMAR_FILES)); - tagger = null; - try { - tagger = new POStagger(); - } catch (IOException e1) { - e1.printStackTrace(); - } catch (ClassNotFoundException e1) { - e1.printStackTrace(); - } + tagger = new StanfordPartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; Added: trunk/components-ext/src/main/resources/tbsl/models/en-pos-maxent.bin =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/models/en-pos-maxent.bin ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: trunk/components-ext/src/main/resources/tbsl/models/en-token.bin =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/models/en-token.bin ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/LemmatizationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/LemmatizationTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/LemmatizationTest.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,13 @@ +package org.dllearner.algorithm.tbsl; + +public class LemmatizationTest { + + /** + * @param args + */ + public static void main(String[] args) { + // TODO Auto-generated method stub + + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/LemmatizationTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -0,0 +1,19 @@ +package org.dllearner.algorithm.tbsl; + +import org.dllearner.algorithm.tbsl.nlp.DBpediaSpotlightNER; +import org.dllearner.algorithm.tbsl.nlp.NER; + +public class NERTest { + + /** + * @param args + */ + public static void main(String[] args) { + String sentence = "When did Nirvana record Nevermind?"; + + NER ner = new DBpediaSpotlightNER(); + System.out.println(ner.getNamedEntitites(sentence)); + + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-12 11:57:08 UTC (rev 2797) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -1,34 +1,39 @@ package org.dllearner.algorithm.tbsl; import java.io.IOException; +import java.util.List; -import org.dllearner.algorithm.tbsl.templator.POStagger; +import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; public class POStest { public static void main(String[] args) throws IOException, ClassNotFoundException { - - POStagger tagger = new POStagger(); - String sentence = "When did Nirvana record Nevermind?"; + PartOfSpeechTagger tagger = new StanfordPartOfSpeechTagger(); + long startTime = System.currentTimeMillis(); String tagged = tagger.tag(sentence); + System.out.format("Tagged sentence with Stanford tagger (%d ms):\n", System.currentTimeMillis()-startTime); + System.out.println(tagged + "\n"); - System.out.println(tagged); -// Tagger tagger = new Tagger("en"); -// -// String s = ""; -// -// String[] words = s.split(" "); -// String[] tagged; -// -// tagged = tagger.tag(words); -// -// for (String string : tagged) { -// System.out.println(string); -// } + tagger = new ApachePartOfSpeechTagger(); + startTime = System.currentTimeMillis(); + tagged = tagger.tag(sentence); + System.out.format("Tagged sentence with Apache OpenNLP (%d ms):\n", System.currentTimeMillis()-startTime); + startTime = System.currentTimeMillis(); + System.out.println(tagged + "\n"); + + + startTime = System.currentTimeMillis(); + List<String> topKTaggedSentences = tagger.tagTopK(sentence); + System.out.format("Top k tags with Apache OpenNLP (%d ms):\n", System.currentTimeMillis()-startTime); + for(String t : topKTaggedSentences){ + System.out.println(t); + } } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-12 11:57:08 UTC (rev 2797) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-12 14:17:45 UTC (rev 2798) @@ -1,7 +1,11 @@ package org.dllearner.algorithm.tbsl; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -18,6 +22,7 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; + public class TemplateGenerationTest { private static List<String> readQuestions(File file){ @@ -57,6 +62,9 @@ File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); List<String> questions = readQuestions(file); + StringBuilder successful = new StringBuilder(); + StringBuilder failed = new StringBuilder(); + Templator templateGenerator = new Templator(); int cnt = 0; for(String question : questions){ @@ -65,16 +73,48 @@ Set<Template> templates = templateGenerator.buildTemplates(question); if(!templates.isEmpty()){ cnt++; + successful.append("*****************************************************************\n"); + successful.append(question).append("\n"); + } else { + failed.append(question).append("\n"); } for(Template t : templates){ + successful.append(t); System.out.println(t); } } catch (Exception e) { + failed.append(question).append("\n"); e.printStackTrace(); } } System.out.println("Could generate templates for " + cnt + "/" + questions.size() + " questions."); + + System.out.println(successful); + System.out.println(failed); + + try { + Writer out = new OutputStreamWriter(new FileOutputStream( + "successful.txt")); + Writer out2 = new OutputStreamWriter(new FileOutputStream( + "failed.txt")); + try { + out.write(successful.toString()); + out2.write(failed.toString()); + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + out.close(); + out2.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-05-13 09:21:30
|
Revision: 2803 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2803&view=rev Author: lorenz_b Date: 2011-05-13 09:21:24 +0000 (Fri, 13 May 2011) Log Message: ----------- Added new NER using Lingpipe API and a local DBpedia dictionary. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java trunk/components-ext/src/main/resources/tbsl/models/dbpedia_lingpipe.dictionary Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-05-13 08:19:14 UTC (rev 2802) +++ trunk/components-ext/pom.xml 2011-05-13 09:21:24 UTC (rev 2803) @@ -14,6 +14,10 @@ <id>Simmetrics</id> <url>http://maven.mse.jhu.edu/m2repository/</url> </repository> + <repository> + <id>Harvard Med</id> + <url>http://repo.open.med.harvard.edu/nexus/content/repositories/public/</url> + </repository> </repositories> <parent> @@ -106,6 +110,11 @@ <artifactId>opennlp-maxent</artifactId> <version>3.0.1-incubating</version> </dependency> + <dependency> + <groupId>com.aliasi</groupId> + <artifactId>lingpipe</artifactId> + <version>4.0.1</version> + </dependency> </dependencies> </project> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-05-13 09:21:24 UTC (rev 2803) @@ -0,0 +1,51 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.aliasi.chunk.Chunk; +import com.aliasi.chunk.Chunker; +import com.aliasi.chunk.Chunking; +import com.aliasi.dict.Dictionary; +import com.aliasi.dict.ExactDictionaryChunker; +import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; +import com.aliasi.util.AbstractExternalizable; + +public class LingPipeNER implements NER{ + + private static final String DICTIONARY_PATH = "src/main/resources/tbsl/models/dbpedia_lingpipe.dictionary"; + + private Chunker ner; + + public LingPipeNER() { + this(true, true); + } + + public LingPipeNER(boolean caseSensitive) { + this(caseSensitive, true); + } + + public LingPipeNER(boolean caseSensitive, boolean allMatches) { + try { + Dictionary<String> dictionary = (Dictionary<String>) AbstractExternalizable.readObject(new File(DICTIONARY_PATH)); + ner = new ExactDictionaryChunker(dictionary, IndoEuropeanTokenizerFactory.INSTANCE, allMatches, caseSensitive); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + + @Override + public List<String> getNamedEntitites(String sentence) { + List<String> namedEntities = new ArrayList<String>(); + Chunking chunking = ner.chunk(sentence); + for(Chunk chunk : chunking.chunkSet()){ + namedEntities.add(sentence.substring(chunk.start(), chunk.end())); + } + return namedEntities; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/resources/tbsl/models/dbpedia_lingpipe.dictionary =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/models/dbpedia_lingpipe.dictionary ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java 2011-05-13 08:19:14 UTC (rev 2802) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java 2011-05-13 09:21:24 UTC (rev 2803) @@ -1,6 +1,9 @@ package org.dllearner.algorithm.tbsl; +import java.util.List; + import org.dllearner.algorithm.tbsl.nlp.DBpediaSpotlightNER; +import org.dllearner.algorithm.tbsl.nlp.LingPipeNER; import org.dllearner.algorithm.tbsl.nlp.NER; public class NERTest { @@ -12,8 +15,17 @@ String sentence = "When did Nirvana record Nevermind?"; NER ner = new DBpediaSpotlightNER(); - System.out.println(ner.getNamedEntitites(sentence)); + long startTime = System.currentTimeMillis(); + List<String> namedEntities = ner.getNamedEntitites(sentence); + System.out.format("Using DBpedia Spotlight WebService (%d ms):\n", System.currentTimeMillis()-startTime); + System.out.println(namedEntities + "\n"); + ner = new LingPipeNER(); + startTime = System.currentTimeMillis(); + namedEntities = ner.getNamedEntitites(sentence); + System.out.format("Using Lingpipe API with local DBpedia dictionary (%d ms):\n", System.currentTimeMillis()-startTime); + System.out.println(namedEntities); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-05-23 10:55:42
|
Revision: 2815 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2815&view=rev Author: christinaunger Date: 2011-05-23 10:55:34 +0000 (Mon, 23 May 2011) Log Message: ----------- [tbsl] comparatives and superlatives added (plus small bug fixes) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Negation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Prefix.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Added Paths: ----------- trunk/components-ext/failed.txt trunk/components-ext/src/main/resources/tbsl/lexicon/adj_list.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/AdjPolarityTest.java trunk/components-ext/successful.txt Added: trunk/components-ext/failed.txt =================================================================== --- trunk/components-ext/failed.txt (rev 0) +++ trunk/components-ext/failed.txt 2011-05-23 10:55:34 UTC (rev 2815) @@ -0,0 +1,19 @@ +Which presidents were born in 1945? +Who developed the video game World of Warcraft? +List all episodes of the first season of the HBO television series The Sopranos! +Which people have as their given name Jimmy? +Is there a video game called Battle Chess? +Which companies work in the aerospace industry as well as on nuclear reactor technology? +Which country does the Airedale Terrier come from? +Which cities have more than 2 million inhabitants? +Who was Tom Hanks married to? +Which capitals in Europe were host cities of the summer olympic games? +Who has been the 5th president of the United States of America? +Which music albums contain the song Last Christmas? +Which genre does DBpedia belong to? +In which programming language is GIMP written? +In which films did Julia Roberts as well as Richard Gere play? +Who wrote the book The pillars of the Earth? +How many films did Leonardo DiCaprio star in? +Which organizations were founded in 1950? +Is Natalie Portman an actress? Property changes on: trunk/components-ext/failed.txt ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -213,15 +213,24 @@ if (predicate.startsWith("SLOT")) { for (Slot s : slots) { if (s.getAnchor().equals(predicate)) { + s.setToken(predicate); predicate = "p" + createFresh(); s.setAnchor(predicate); template.addSlot(s); break; } + else if (s.getToken().equals(predicate)) { + predicate = s.getAnchor(); + } } } SPARQL_Property prop = new SPARQL_Property(predicate); prop.setIsVariable(true); + + boolean noliteral = true; + if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { + noliteral = false; + } if (predicate.equals("count")) { // COUNT(?x) AS ?c @@ -234,41 +243,45 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue()), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), SPARQL_PairType.GT))); return query; } else if (predicate.equals("greaterorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue()), - SPARQL_PairType.LT))); + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.GTEQ))); return query; } else if (predicate.equals("less")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue()), - SPARQL_PairType.LTEQ))); + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.LT))); return query; } else if (predicate.equals("lessorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue()), - SPARQL_PairType.GT))); + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.LTEQ))); return query; } else if (predicate.equals("maximum")) { - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.MAX)); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),true)); + query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.DESC)); + query.setLimit(1); return query; } else if (predicate.equals("minimum")) { - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.MIN)); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),true)); + query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.ASC)); + query.setLimit(1); return query; } else if (predicate.equals("equal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue()), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), SPARQL_PairType.EQ))); return query; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -124,7 +124,6 @@ */ try { - TreeNode tree = c.construct("NUM:'" + token + "'"); int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree), @@ -241,12 +240,16 @@ try { for (String[] entry : entries) { String anchor = entry[0]; - String treestring = entry[1]; - String dude = entry[2]; + String treestring = entry[1]; + List<String> dudeStrings = new ArrayList<String>(); + for (String s : entry[2].trim().split(";;")) { + if (!s.equals("")) { + dudeStrings.add(s.trim()); + } + } TreeNode tree = c.construct(treestring); - int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(anchor,tree), - Collections.singletonList(dude)); + int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(anchor,tree), dudeStrings); add(parseG, tree, gid-1, localID); temps.add(gid-1); localID++; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -356,4 +356,70 @@ } } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((conditions == null) ? 0 : conditions.hashCode()); + result = prime * result + ((filter == null) ? 0 : filter.hashCode()); + result = prime * result + limit; + result = prime * result + offset; + result = prime * result + ((orderBy == null) ? 0 : orderBy.hashCode()); + result = prime * result + + ((prefixes == null) ? 0 : prefixes.hashCode()); + result = prime * result + ((qt == null) ? 0 : qt.hashCode()); + result = prime * result + + ((selTerms == null) ? 0 : selTerms.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Query other = (Query) obj; + if (conditions == null) { + if (other.conditions != null) + return false; + } else if (!conditions.equals(other.conditions)) + return false; + if (filter == null) { + if (other.filter != null) + return false; + } else if (!filter.equals(other.filter)) + return false; + if (limit != other.limit) + return false; + if (offset != other.offset) + return false; + if (orderBy == null) { + if (other.orderBy != null) + return false; + } else if (!orderBy.equals(other.orderBy)) + return false; + if (prefixes == null) { + if (other.prefixes != null) + return false; + } else if (!prefixes.equals(other.prefixes)) + return false; + if (qt == null) { + if (other.qt != null) + return false; + } else if (!qt.equals(other.qt)) + return false; + if (selTerms == null) { + if (other.selTerms != null) + return false; + } else if (!selTerms.equals(other.selTerms)) + return false; + return true; + } + + + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Negation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Negation.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Negation.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -17,4 +17,31 @@ return retVal; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Negation other = (SPARQL_Negation) obj; + if (term == null) { + if (other.term != null) + return false; + } else if (!term.equals(other.term)) + return false; + return true; + } + + + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -46,4 +46,43 @@ return ""; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((a == null) ? 0 : a.hashCode()); + result = prime * result + ((b == null) ? 0 : b.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Pair other = (SPARQL_Pair) obj; + if (a == null) { + if (other.a != null) + return false; + } else if (!a.equals(other.a)) + return false; + if (b == null) { + if (other.b != null) + return false; + } else if (!b.equals(other.b)) + return false; + if (type == null) { + if (other.type != null) + return false; + } else if (!type.equals(other.type)) + return false; + return true; + } + + + } \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Prefix.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Prefix.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Prefix.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -28,5 +28,35 @@ this.name = name; this.url = url; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + result = prime * result + ((url == null) ? 0 : url.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Prefix other = (SPARQL_Prefix) obj; + if (name == null) { + if (other.name != null) + return false; + } else if (!name.equals(other.name)) + return false; + if (url == null) { + if (other.url != null) + return false; + } else if (!url.equals(other.url)) + return false; + return true; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -33,6 +33,29 @@ } return prefix.getName()+":"+name; } + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((prefix == null) ? 0 : prefix.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Property other = (SPARQL_Property) obj; + if (prefix == null) { + if (other.prefix != null) + return false; + } else if (!prefix.equals(other.prefix)) + return false; + return true; + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -56,4 +56,46 @@ this.value = value; this.optional = optional; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (optional ? 1231 : 1237); + result = prime * result + + ((property == null) ? 0 : property.hashCode()); + result = prime * result + ((value == null) ? 0 : value.hashCode()); + result = prime * result + + ((variable == null) ? 0 : variable.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Triple other = (SPARQL_Triple) obj; + if (optional != other.optional) + return false; + if (property == null) { + if (other.property != null) + return false; + } else if (!property.equals(other.property)) + return false; + if (value == null) { + if (other.value != null) + return false; + } else if (!value.equals(other.value)) + return false; + if (variable == null) { + if (other.variable != null) + return false; + } else if (!variable.equals(other.variable)) + return false; + return true; + } + + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -36,4 +36,34 @@ } } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (isVariable ? 1231 : 1237); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SPARQL_Value other = (SPARQL_Value) obj; + if (isVariable != other.isVariable) + return false; + if (name == null) { + if (other.name != null) + return false; + } else if (!name.equals(other.name)) + return false; + return true; + } + + + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -7,17 +7,20 @@ public class Slot { String anchor; + String token; SlotType type; List<String> words; public Slot(String a,List<String> ws) { anchor = a; + token = ""; type = SlotType.UNSPEC; words = ws; replaceUnderscores(); } public Slot(String a,SlotType t,List<String> ws) { anchor = a; + token = ""; type = t; words = ws; replaceUnderscores(); @@ -37,6 +40,12 @@ public void setAnchor(String s) { anchor = s; } + public String getToken() { + return token; + } + public void setToken(String t) { + token = t; + } public List<String> getWords() { return words; @@ -84,5 +93,47 @@ return new Slot(anchor,type,newWords); } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((anchor == null) ? 0 : anchor.hashCode()); + result = prime * result + ((token == null) ? 0 : token.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + result = prime * result + ((words == null) ? 0 : words.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Slot other = (Slot) obj; + if (anchor == null) { + if (other.anchor != null) + return false; + } else if (!anchor.equals(other.anchor)) + return false; + if (token == null) { + if (other.token != null) + return false; + } else if (!token.equals(other.token)) + return false; + if (type == null) { + if (other.type != null) + return false; + } else if (!type.equals(other.type)) + return false; + if (words == null) { + if (other.words != null) + return false; + } else if (!words.equals(other.words)) + return false; + return true; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -1,5 +1,7 @@ package org.dllearner.algorithm.tbsl.templator; +import java.io.BufferedReader; +import java.io.FileReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -58,10 +60,11 @@ String tokenfluent = token.replaceAll(" ",""); String slotX = "x/" + type + "/"; String slotP = "SLOT_" + tokenfluent + "/" + type + "/"; + String slotC = "SLOT_" + tokenfluent + "/CLASS/"; for (Iterator<String> i = words.iterator(); i.hasNext();) { String next = i.next().replaceAll(" ","_"); - slotX += next; slotP += next; - if (i.hasNext()) { slotX += "^"; slotP += "^"; } + slotX += next; slotP += next; slotC += next; + if (i.hasNext()) { slotX += "^"; slotP += "^"; slotC += "^"; } } String treetoken = "N:'" + token.toLowerCase() + "'"; if (token.trim().contains(" ")) { @@ -96,31 +99,21 @@ result.add(dpEntry2); } else if (pos.equals("NPREP")) { - /* DP */ - String[] dpEntry1a = {token, + String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]>"}; - String[] dpEntry1b = {token, - "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; - String[] dpEntry2a = {token, + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]> ;; " + + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotC + "," + "SLOT_of/PROPERTY/" + "]>"}; + String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "]>"}; - String[] dpEntry2b = {token, - "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; - String[] npEntry1 = {token, + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "]> ;; " + + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotC + "," + "SLOT_of/PROPERTY/" + "]>"}; + String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]>"}; - String[] npEntry2 = {token, - "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; - result.add(dpEntry1a); - result.add(dpEntry1b); - result.add(dpEntry2a); - result.add(dpEntry2b); - result.add(npEntry1); - result.add(npEntry2); + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]> ;; " + + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotC + "," + "SLOT_of/PROPERTY/" + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); + result.add(npEntry); } } @@ -245,11 +238,41 @@ } /* COMPARATIVE */ else if (pos.equals("JJR")) { - // TODO polarity not given, reference value not determinable + String pol = polarity(token); + String comp; + if (pol.equals("POS")) { + comp = "greater"; + } else { comp = "less"; } + + String[] compEntry1 = {token, + "(ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj])", + "<x,l1,<e,t>,[ l1:[ j,i | SLOT_" + token + "(x,j), SLOT_" + token + "(y,i), " + comp + "(j,i) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + result.add(compEntry1); + String[] compEntry2 = {token, + "(NP NP* (ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj]))", + "<x,l1,<e,t>,[ l1:[ j,i | SLOT_" + token + "(x,j), SLOT_" + token + "(y,i), " + comp + "(j,i) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + result.add(compEntry2); } /* SUPERLATIVE */ else if (pos.equals("JJS")) { - // ditto + String pol = polarity(token); + String comp; + if (pol.equals("POS")) { + comp = "maximum"; + } else { comp = "minimum"; } + + String[] superEntry1 = {token, + "(DET DET:'the' ADJ:'" + token.toLowerCase() + "')", + "<x,l1,e,[ l1:[ x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + result.add(superEntry1); + String[] superEntry2 = {token, + "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "'))", + "<x,l1,<<e,t>,t>,[ l1:[ x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + result.add(superEntry2); + String[] superEntry3 = {token, + "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "' NP[noun]))", + "<x,l1,<<e,t>,t>,[ l1:[ x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[ (l2,x,noun,<e,t>) ],[l2=l1],["+slot+"]>"}; + result.add(superEntry3); } } /* PREPOSITIONS */ @@ -274,6 +297,27 @@ return false; } + private String polarity(String adj) { + + String polarity = "POS"; + + BufferedReader in; + try { + in = new BufferedReader(new FileReader("src/main/resources/tbsl/lexicon/adj_list.txt")); + String line; + while ((line = in.readLine()) != null ) { + if (line.contains(adj)) { + polarity = line.split(" ")[0]; + break; + } + } + in.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + return polarity; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-19 16:05:12 UTC (rev 2814) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -31,6 +31,7 @@ LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); Parser p; + boolean ONE_SCOPE_ONLY = true; public Templator() { @@ -67,45 +68,66 @@ } else { try { - for (TreeNode dtree : p.buildDerivedTrees(g)) { - if (!dtree.getAnchor().trim().equals(tagged.toLowerCase())) { - System.err.println("[Templator.java] Anchors don't match the input. (Nevermind...)"); - break; - } - } + p.buildDerivedTrees(g); +// for (TreeNode dtree : p.buildDerivedTrees(g)) { +// if (!dtree.getAnchor().trim().equals(tagged.toLowerCase())) { +// System.err.println("[Templator.java] Anchors don't match the input. (Nevermind...)"); +// break; +// } +// } } catch (ParseException e) { System.err.println("[Templator.java] ParseException at '" + e.getMessage() + "'"); } } - List<DRS> drses; + List<DRS> drses = new ArrayList<DRS>(); Set<Template> templates = new HashSet<Template>(); for (Dude dude : p.getDudes()) { -// System.out.println("DUDE: " + dude); // DEBUG + // System.out.println("DUDE: " + dude); // DEBUG UDRS udrs = d2u.convert(dude); if (udrs != null) { - drses = new ArrayList<DRS>(); - drses.addAll(udrs.initResolve()); - for (DRS drs : drses) { -// System.out.println("DRS: " + drs); // DEBUG - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); -// //DEBUG -// for (Slot sl : slots) { -// System.out.println(sl); -// } -// // - try { - Template temp = d2s.convert(drs,slots); - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } + for (DRS drs : udrs.initResolve()) { +// System.out.println(drs); // DEBUG + if (!drses.contains(drs)) { + drses.add(drs); + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); +// //DEBUG +// for (Slot sl : slots) { +// System.out.println(sl); +// } +// // + try { + Template temp = d2s.convert(drs,slots); + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + } + if (ONE_SCOPE_ONLY) { break; } } } } - + +// for (DRS drs : drses) { +//// System.out.println("DRS: " + drs); // DEBUG +// List<Slot> slots = new ArrayList<Slot>(); +// slots.addAll(dude.getSlots()); +//// //DEBUG +//// for (Slot sl : slots) { +//// System.out.println(sl); +//// } +//// // +// try { +// Template temp = d2s.convert(drs,slots); +// templates.add(temp); +// } catch (java.lang.ClassCastException e) { +// continue; +// } +// if (ONE_SCOPE_ONLY) { break; } +// } + if (clearAgain) { p.clear(g,p.getTemps()); } Added: trunk/components-ext/src/main/resources/tbsl/lexicon/adj_list.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/adj_list.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/adj_list.txt 2011-05-23 10:55:34 UTC (rev 2815) @@ -0,0 +1,269 @@ +POS able abler ablest +POS ample ampler amplest +POS angry angrier angriest +POS apt apter aptest +NEG bad worse worst +POS baggy baggier baggiest +POS bald balder baldest +POS beefy beefier beefiest +POS big bigger biggest +POS bitter bitterer bitterest +POS blind blinder blindest +POS blond blonder blondest +POS bloodthirsty bloodthirstier bloodthirstiest +POS bloody bloodier bloodiest +POS blue bluer bluest +POS blurry blurrier blurriest +POS bold bolder boldest +POS brave braver bravest +POS bright brighter brightest +POS busy busier busiest +POS calm calmer calmest +POS cheap cheaper cheapest +POS cheeky cheekier cheekiest +POS civil civiler civilest +POS clean cleaner cleanest +POS cleanly cleanlier cleanliest +POS clear clearer clearest +POS clever cleverer cleverest +POS close closer closest +POS cloudy cloudier cloudiest +POS clumpy clumpier clumpiest +POS clumsy clumsier clumsiest +POS coarse coarser coarsest +POS cold colder coldest +POS cool cooler coolest +POS costly costlier costliest +POS crafty craftier craftiest +POS cranky crankier crankiest +POS crazy crazier craziest +POS creamy creamier creamiest +POS crisp crisper crispest +POS crispy crispier crispiest +POS cruel crueler cruelest +POS curvy curvier curviest +POS cute cuter cutest +POS dark darker darkest +POS dead deader deadest +POS deaf deafer deafest +POS dear dearer dearest +POS deep deeper deepest +POS dense denser densest +POS dim dimmer dimmest +POS dirty dirtier dirtiest +POS dotty dottier dottiest +POS dry drier driest +POS dull duller dullest +POS dumb dumber dumbest +POS dusty dustier dustiest +POS early earlier earliest +POS easy easier easiest +POS edgy edgier edgiest +POS empty emptier emptiest +POS faint fainter faintest +POS fair fairer fairest +POS fancy fancier fanciest +POS far farther farthest +POS far further furthest +POS fast faster fastest +POS fat fatter fattest +POS fatty fattier fattiest +POS faulty faultier faultiest +NEG few fewer fewest +POS fierce fiercer fiercest +POS filthy filthier filthiest +POS fine finer finest +POS fit fitter fittest +POS flat flatter flattest +POS fond fonder fondest +POS foul fouler foulest +POS freaky freakier freakiest +POS fresh fresher freshest +POS friendly friendlier friendliest +POS full fuller fullest +POS funky funkier funkiest +POS funny funnier funniest +POS furry furrier furriest +POS fussy fussier fussiest +POS fuzzy fuzzier fuzziest +POS gentle gentler gentlest +POS ghastly ghastlier ghastliest +POS glad gladder gladdest +POS godly godlier godliest +POS good better best +POS goodly goodlier goodliest +POS grainy grainier grainiest +POS gray grayer grayest +POS great greater greatest +POS greedy greedier greediest +POS green greener greenest +POS grey greyer greyest +POS grim grimmer grimmest +POS gross grosser grossest +POS hairy hairier hairiest +POS handsome handsomer handsomest +POS handy handier handiest +POS happy happier happiest +POS hard harder hardest +POS hardy hardier hardiest +POS hasty hastier hastiest +POS healthy healthier healthiest +POS hearty heartier heartiest +POS heavy heavier heaviest +POS hefty heftier heftiest +POS high higher highest +POS hip hipper hippest +POS holy holier holiest +POS hot hotter hottest +POS huge huger hugest +POS humble humbler humblest +POS hungry hungrier hungriest +POS icy icier iciest +POS idle idler idlest +POS ill iller illest +POS jerky jerkier jerkiest +POS keen keener keenest +POS kind kinder kindest +POS lame lamer lamest +POS large larger largest +POS late later latest +POS lazy lazier laziest +POS likely likelier likeliest +NEG little littler littlest +POS lively livelier liveliest +POS lonely lonelier loneliest +POS long longer longest +POS loud louder loudest +POS lousy lousier lousiest +NEG low lower lowest +NEG lowly lowlier lowliest +POS lucky luckier luckiest +POS mean meaner meanest +POS merry merrier merriest +POS messy messier messiest +POS mighty mightier mightiest +POS mild milder mildest +POS naive naiver naivest +NEG narrow narrower narrowest +NEG near nearer nearest +POS neat neater neatest +POS new newer newest +POS nice nicer nicest +POS noble nobler noblest +POS noisy noisier noisiest +POS obscure obscurer obscurest +POS old elder eldest +POS old older oldest +POS pale paler palest +POS plain plainer plainest +POS plump plumper plumpest +POS polite politer politest +NEG poor poorer poorest +POS posh posher poshest +POS pretty prettier prettiest +POS proud prouder proudest +POS quick quicker quickest +NEG quiet quieter quietest +POS rainy rainier rainiest +POS rank ranker rankest +NEG rare rarer rarest +POS raw rawer rawest +POS real realer realest +POS red redder reddest +POS remote remoter remotest +POS rich richer richest +POS ripe riper ripest +POS risky riskier riskiest +POS rosy rosier rosiest +POS rough rougher roughest +POS rude ruder rudest +POS rusty rustier rustiest +POS sad sadder saddest +POS safe safer safest +POS sane saner sanest +POS scary scarier scariest +POS secure securer securest +POS severe severer severest +POS shaky shakier shakiest +NEG shallow shallower shallowest +POS sharp sharper sharpest +POS shiny shinier shiniest +NEG short shorter shortest +POS shy shyer shyest +POS sick sicker sickest +POS sickly sicklier sickliest +POS silly sillier silliest +POS simple simpler simplest +POS sleepy sleepier sleepiest +NEG slim slimmer slimmest +NEG slimy slimier slimiest +POS slippery slipperier slipperiest +NEG slow slower slowest +POS sly slier sliest +POS sly slyer slyest +NEG small smaller smallest +POS smart smarter smartest +POS smelly smellier smelliest +POS smoky smokier smokiest +POS snowy snowier snowiest +POS sober soberer soberest +POS soft softer softest +POS solid solider solidest +NEG soon sooner soonest +POS sound sounder soundest +POS sour sourer sourest +POS spare sparer sparest +NEG sparse sparser sparsest +POS speedy speedier speediest +POS spicy spicier spiciest +POS stale staler stalest +POS stark starker starkest +POS steady steadier steadiest +POS stealthy stealthier stealthiest +POS steep steeper steepest +POS sticky stickier stickiest +POS stiff stiffer stiffest +POS still stiller stillest +POS stony stonier stoniest +POS stormy stormier stormiest +POS strict stricter strictest +POS strong stronger strongest +POS stubborn stubborner stubbornest +POS stupid stupider stupidest +POS sunny sunnier sunniest +POS sure surer surest +POS sweet sweeter sweetest +POS swift swifter swiftest +POS talky talkier talkiest +POS tall taller tallest +POS tense tenser tensest +POS thick thicker thickest +NEG thin thinner thinnest +POS thirsty thirstier thirstiest +POS thready threadier threadiest +POS tidy tidier tidiest +NEG tight tighter tightest +NEG tiny tinier tiniest +POS tough tougher toughest +POS tricky trickier trickiest +POS true truer truest +POS trusty trustier trustiest +POS ugly uglier ugliest +POS unhealthy unhealthier unhealthiest +POS vague vaguer vaguest +POS warm warmer warmest +POS weak weaker weakest +POS wealthy wealthier wealthiest +POS weird weirder weirdest +POS wet wetter wettest +POS whiny whinier whiniest +POS white whiter whitest +POS wide wider widest +POS wild wilder wildest +POS windy windier windiest +POS wiry wirier wiriest +POS wise wiser wisest +POS wordy wordier wordiest +POS worldly worldlier worldliest +NEG young younger youngest +POS yummy yummier yummiest Property changes on: trunk/components-ext/src/main/resources/tbsl/lexicon/adj_list.txt ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/AdjPolarityTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/AdjPolarityTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/AdjPolarityTest.java 2011-05-23 10:55:34 UTC (rev 2815) @@ -0,0 +1,31 @@ +package org.dllearner.algorithm.tbsl; + +import java.io.BufferedReader; +import java.io.FileReader; + +public class AdjPolarityTest { + + public static void main(String[] args) { + + String polarity = "POS"; + + BufferedReader in; + try { + in = new BufferedReader(new FileReader("src/main/resources/tbsl/lexicon/adj_list.txt")); + String line; + while ((line = in.readLine()) != null ) { + if (line.contains("small")) { + polarity = line.split(" ")[0]; + break; + } + } + in.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + System.out.println(polarity); + + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/AdjPolarityTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/successful.txt =================================================================== --- trunk/components-ext/successful.txt (rev 0) +++ trunk/components-ext/successful.txt 2011-05-23 10:55:34 UTC (rev 2815) @@ -0,0 +1,729 @@ +***************************************************************** +Give me all school types. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p0 . +} + +>> SLOTS: +p0: CLASS {school types} +***************************************************************** +Who are the presidents of the United States? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {United States} +p1: CLASS {presidents,president} +p2: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v0 ?p0 ?y . +} + +>> SLOTS: +v0: RESOURCE {United States} +p0: PROPERTY {presidents,president} +***************************************************************** +Who was the wife of President Lincoln? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {President Lincoln} +p1: CLASS {wife} +p2: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v0 ?p0 ?y . +} + +>> SLOTS: +v0: RESOURCE {President Lincoln} +p0: PROPERTY {wife} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p5 ?v0 . + ?y rdf:type ?p4 . +} + +>> SLOTS: +v0: RESOURCE {President Lincoln} +p4: CLASS {wife} +p5: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v0 ?p3 ?y . +} + +>> SLOTS: +v0: RESOURCE {President Lincoln} +p3: PROPERTY {wife} +***************************************************************** +What is the official website of Tom Hanks? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p9 ?v0 . + ?y ?p8 ?j . + ?y rdf:type ?p7 . +} + +>> SLOTS: +v0: RESOURCE {Tom Hanks} +p7: CLASS {website,site} +p8: PROPERTY {} +p9: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v1 ?p1 ?y . + ?y ?p0 ?j . +} + +>> SLOTS: +v1: RESOURCE {Tom Hanks} +p0: PROPERTY {} +p1: PROPERTY {website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p2 . + ?y ?p4 ?j . + ?y ?p3 ?v1 . +} + +>> SLOTS: +v1: RESOURCE {Tom Hanks} +p2: CLASS {website,site} +p3: PROPERTY {} +p4: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p6 ?j . + ?v0 ?p5 ?y . +} + +>> SLOTS: +v0: RESOURCE {Tom Hanks} +p5: PROPERTY {website,site} +p6: PROPERTY {} +***************************************************************** +Who produced the most films? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT COUNT(?y) AS c2 WHERE { + ?x ?p1 ?y . + ?y rdf:type ?p0 . +} +ORDER BY DESC(?c2) +LIMIT 1 OFFSET 0 + +>> SLOTS: +p0: CLASS {films,movie,film,picture,pic} +p1: PROPERTY {produced} +***************************************************************** +Which mountains are higher than the Nanga Parbat? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p1 ?j . + ?v0 ?p1 ?i . + ?y rdf:type ?p0 . + FILTER(?j > ?i) . +} + +>> SLOTS: +v0: RESOURCE {Nanga Parbat} +p0: CLASS {mountains,mountain,mount} +p1: PROPERTY {degree} +***************************************************************** +Who created English Wikipedia? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x ?p0 ?y . +} + +>> SLOTS: +y: RESOURCE {English Wikipedia} +p0: PROPERTY {created} +***************************************************************** +Give me all actors starring in Batman Begins. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p1 ?v1 . + ?y rdf:type ?p0 . +} + +>> SLOTS: +p0: CLASS {actors,actor,histrion,player,thespian} +p1: PROPERTY {starring} +***************************************************************** +Which software has been developed by organizations founded in California? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?y ?p0 ?v1 . + ?x rdf:type ?p2 . + ?y ?p1 ?x . + ?y rdf:type ?p3 . +} + +>> SLOTS: +v1: RESOURCE {California} +p0: PROPERTY {founded} +p1: PROPERTY {developed} +p2: CLASS {software,package} +p3: CLASS {organizations,organization,organisation} +***************************************************************** +Is Christian Bale starring in Batman Begins? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +ASK WHERE { + ?y ?p0 ?v0 . +} + +>> SLOTS: +y: RESOURCE {Christian Bale} +v0: RESOURCE {Batman Begins} +p0: PROPERTY {starring} +***************************************************************** +Give me the websites of companies with more than 500000 employees. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?v1 rdf:type ?p34 . + ?y ?p31 ?v1 . + ?v0 rdf:type ?p32 . + ?y rdf:type ?p35 . + ?v1 ?p33 ?v2 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p31: PROPERTY {} +p32: CLASS {employees,employee} +p33: PROPERTY {} +p34: CLASS {companies,company} +p35: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?y ?p13 ?v2 . + ?y rdf:type ?p17 . + ?y ?p14 ?v1 . + ?v2 rdf:type ?p16 . + ?v0 rdf:type ?p15 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p13: PROPERTY {} +p14: PROPERTY {} +p15: CLASS {employees,employee} +p16: CLASS {companies,company} +p17: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?y ?p6 ?v3 . + ?y ?p4 ?v1 . + ?y rdf:type ?p8 . + ?v0 rdf:type ?p5 . + ?v3 rdf:type ?p7 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p4: PROPERTY {} +p5: CLASS {employees,employee} +p6: PROPERTY {} +p7: CLASS {companies,company} +p8: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?v1 ?p29 ?v2 . + ?v0 rdf:type ?p27 . + ?v1 ?p28 ?y . + ?v1 rdf:type ?p30 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p27: CLASS {employees,employee} +p28: PROPERTY {websites,website,site} +p29: PROPERTY {} +p30: CLASS {companies,company} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?y ?p0 ?v1 . + ?v3 ?p3 ?y . + ?v3 rdf:type ?p2 . + ?v0 rdf:type ?p1 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p0: PROPERTY {} +p1: CLASS {employees,employee} +p2: CLASS {companies,company} +p3: PROPERTY {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?v1 ?p20 ?v2 . + ?v1 rdf:type ?p21 . + ?v0 rdf:type ?p18 . + ?v1 ?p19 ?y . + FILTER(?c > 500000) . +} + +>> SLOTS: +p18: CLASS {employees,employee} +p19: PROPERTY {websites,website,site} +p20: PROPERTY {} +p21: CLASS {companies,company} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?v1 rdf:type ?p25 . + ?v1 ?p24 ?v2 . + ?y rdf:type ?p26 . + ?y ?p22 ?v1 . + ?v0 rdf:type ?p23 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p22: PROPERTY {} +p23: CLASS {employees,employee} +p24: PROPERTY {} +p25: CLASS {companies,company} +p26: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?v2 ?p12 ?y . + ?v0 rdf:type ?p10 . + ?v2 rdf:type ?p11 . + ?y ?p9 ?v1 . + FILTER(?c > 500000) . +} + +>> SLOTS: +p9: PROPERTY {} +p10: CLASS {employees,employee} +p11: CLASS {companies,company} +p12: PROPERTY {websites,website,site} +***************************************************************** +Which actors were born in Germany? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x rdf:type ?p0 . + ?x ?p1 ?y . +} + +>> SLOTS: +y: RESOURCE {Germany} +p0: CLASS {actors,actor,histrion,player,thespian} +p1: PROPERTY {born} +***************************************************************** +Which birds are there in the United States? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p1 ?v1 . + ?y rdf:type ?p0 . +} + +>> SLOTS: +v1: RESOURCE {United States} +p0: CLASS {birds,bird} +p1: PROPERTY {} +***************************************************************** +Give me all European Capitals! +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p1 ?j . + ?y rdf:type ?p0 . +} + +>> SLOTS: +p0: CLASS {Capitals,capital} +p1: PROPERTY {} +***************************************************************** +When was DBpedia released? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?x ?p0 ?y . +} + +>> SLOTS: +x: RESOURCE {DBpedia} +p0: PROPERTY {releasedDate} +***************************************************************** +Which people were born in Heraklion? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x rdf:type ?p0 . + ?x ?p1 ?y . +} + +>> SLOTS: +y: RESOURCE {Heraklion} +p0: CLASS {people} +p1: PROPERTY {born} +***************************************************************** +Which caves have more than 3 entrances? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x rdf:type ?p0 . + ?x ?p1 ?y . + ?v0 rdf:type ?p2 . + FILTER(?c > 3) . +} + +>> SLOTS: +p0: CLASS {caves,cave} +p1: PROPERTY {have} +p2: CLASS {entrances,entrance,entranceway,entryway,entry} +***************************************************************** +Give me all films produced by Hal Roach. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?v1 ?p0 ?y . +} + +>> SLOTS: +p0: PROPERTY {produced} +p1: CLASS {films,movie,film,picture,pic} +***************************************************************** +Which software has been published by Mean Hamster Software? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x rdf:type ?p1 . + ?y ?p0 ?x . +} + +>> SLOTS: +y: RESOURCE {Mean Hamster Software} +p0: PROPERTY {published} +p1: CLASS {software,package} +***************************************************************** +What languages are spoken in Estonia? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p0 . + ?y ?p1 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {Estonia} +p0: CLASS {languages,language} +p1: PROPERTY {spoken} +***************************************************************** +Who owns Aldi? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x ?p0 ?y . +} + +>> SLOTS: +y: RESOURCE {Aldi} +p0: PROPERTY {owns} +***************************************************************** +Who is called Dana? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p0 'dana' . +} + +>> SLOTS: +p0: PROPERTY {title,name} +***************************************************************** +Which books were written by Danielle Steel? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x rdf:type ?p1 . + ?y ?p0 ?x . +} + +>> SLOTS: +y: RESOURCE {Danielle Steel} +p0: PROPERTY {written} +p1: CLASS {books,book} +***************************************************************** +Which companies are located in California, USA? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p0 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {California USA} +p0: PROPERTY {located} +p1: CLASS {companies,company} +***************************************************************** +Which country has the most official languages? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?j ?x WHERE { + ?x rdf:type ?p0 . + ?y ?p1 ?j . + ?y rdf:type ?p2 . + ?x ?p3 ?y . +} +ORDER BY DESC(?j) +LIMIT 1 OFFSET 0 + +>> SLOTS: +p0: CLASS {country,state,nation,land,commonwealth} +p1: PROPERTY {} +p2: CLASS {languages,language} +p3: PROPERTY {has} +***************************************************************** +Who produced films starring Natalie Portman? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x ?p1 ?y . + ?y rdf:type ?p0 . + ?y ?p2 ?v1 . +} + +>> SLOTS: +v1: RESOURCE {Natalie Portman} +p0: CLASS {films,movie,film,picture,pic} +p1: PROPERTY {produced} +p2: PROPERTY {starring} +***************************************************************** +Give me all movies with Tom Cruise! +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p0 ?v1 . +} + +>> SLOTS: +p0: PROPERTY {} +p1: CLASS {movies,movie,film,picture,pic} +***************************************************************** +Give me all female German chancellors! +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p2 ?j . + ?y ?p0 ?v0 . +} + +>> SLOTS: +p0: PROPERTY {} +p1: CLASS {chancellors,Chancellor} +p2: PROPERTY {} +***************************************************************** +Give me all soccer clubs in the Premier League. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p0 ?v1 . +} + +>> SLOTS: +v1: RESOURCE {Premier League} +p0: PROPERTY {} +p1: CLASS {soccer clubs} +***************************************************************** +When was Capcom founded? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?x ?p0 ?y . +} + +>> SLOTS: +x: RESOURCE {Capcom} +p0: PROPERTY {foundedDate} +***************************************************************** +What is the highest mountain? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?j ?y WHERE { + ?y ?p1 ?j . + ?y rdf:type ?p0 . +} +ORDER BY DESC(?j) +LIMIT 1 OFFSET 0 + +>> SLOTS: +p0: CLASS {mountain,mount} +p1: PROPERTY {degree} Property changes on: trunk/components-ext/successful.txt ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-05-24 15:57:45
|
Revision: 2819 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2819&view=rev Author: christinaunger Date: 2011-05-24 15:57:37 +0000 (Tue, 24 May 2011) Log Message: ----------- [tbsl] included NER into ltag.parser.Preprocessor Modified Paths: -------------- trunk/components-ext/failed.txt trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java trunk/components-ext/successful.txt Modified: trunk/components-ext/failed.txt =================================================================== --- trunk/components-ext/failed.txt 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/failed.txt 2011-05-24 15:57:37 UTC (rev 2819) @@ -1,19 +1,16 @@ -Which presidents were born in 1945? -Who developed the video game World of Warcraft? -List all episodes of the first season of the HBO television series The Sopranos! -Which people have as their given name Jimmy? -Is there a video game called Battle Chess? -Which companies work in the aerospace industry as well as on nuclear reactor technology? -Which country does the Airedale Terrier come from? -Which cities have more than 2 million inhabitants? -Who was Tom Hanks married to? -Which capitals in Europe were host cities of the summer olympic games? -Who has been the 5th president of the United States of America? -Which music albums contain the song Last Christmas? -Which genre does DBpedia belong to? -In which programming language is GIMP written? -In which films did Julia Roberts as well as Richard Gere play? -Who wrote the book The pillars of the Earth? -How many films did Leonardo DiCaprio star in? -Which organizations were founded in 1950? -Is Natalie Portman an actress? +Who is the daughter of Bill Clinton married to? +Where did Abraham Lincoln die? +In which country does the Nile start? +Is proinsulin a protein? +Which classis does the Millepede belong to? +How tall is Claudia Schiffer? +Give me the capitals of all U.S. states. +Is Egypts largest city also its capital? +In which country is the Limerick Lake? +In which films directed by Garry Marshall was Julia Roberts starring? +Was U.S. president Jackson involved in a war? +Which state of the United States of America has the highest density? +Which countries in the European Union adopted the Euro? +Through which countries does the Yenisei river flow? +When was the Battle of Gettysburg? +What did Bruce Carver die from? Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -80,7 +80,7 @@ internalParse(parseGrammar.getDPInitTrees(), n); } - System.out.println("Constructed " + derivationTrees.size() + " derivation trees."); + System.out.println("Constructed " + derivationTrees.size() + " derivation trees.\n"); return derivationTrees; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -6,6 +6,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.dllearner.algorithm.tbsl.nlp.LingPipeNER; +import org.dllearner.algorithm.tbsl.nlp.NER; import org.dllearner.algorithm.tbsl.sem.util.Pair; public class Preprocessor { @@ -160,4 +162,49 @@ return flat; } + + public static String findNEs(String tagged,String untagged) { + + String out = tagged; + + NER ner = new LingPipeNER(); + List<String> namedentities = ner.getNamedEntitites(untagged); + List<String> usefulnamedentities = new ArrayList<String>(); + + System.out.println("Proposed NEs: " + namedentities); + + // keep only longest matches (e.g. keep 'World of Warcraft' and forget about 'Warcraft') + // containing at least one upper case letter (in order to filter out errors like 'software') + for (String s1 : namedentities) { + if (s1.matches(".*[A-Z].*")) { + boolean isLongestMatch = true; + for (String s2 : namedentities) { + if (!s2.equals(s1) && s2.contains(s1)) { + isLongestMatch = false; + } + } + if (isLongestMatch) { + usefulnamedentities.add(s1); + } + } + } + + System.out.println("Accepted NEs: " + usefulnamedentities); + + // replace POS tags accordingly + for (String ne : usefulnamedentities) { + String[] neparts = ne.split(" "); + Pattern p; Matcher m; + for (String nep : neparts) { + p = Pattern.compile("(\\s)?(" + nep + "/([A-Z]+))(\\s)?"); + m = p.matcher(out); + while (m.find()) { + out = out.replaceFirst(m.group(2),nep+"/NNP"); + } + } + } + + return out; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -66,6 +66,7 @@ slotX += next; slotP += next; slotC += next; if (i.hasNext()) { slotX += "^"; slotP += "^"; slotC += "^"; } } + // treetoken String treetoken = "N:'" + token.toLowerCase() + "'"; if (token.trim().contains(" ")) { String[] tokenParts = token.split(" "); @@ -75,17 +76,26 @@ } treetoken = treetoken.trim(); } + // if (pos.equals("NN") || pos.equals("NNS")) { /* DP */ - String[] dpEntry = {token, + String[] dpEntry1 = {token, "(DP (NP " + treetoken + "))", "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotP + "]>"}; - result.add(dpEntry); + String[] dpEntry2 = {token, + "(DP (NP " + treetoken + " DP[name]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slotP + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); /* NP */ - String[] npEntry = {token, + String[] npEntry1 = {token, "(NP " + treetoken + ")", "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotP + "]>"}; - result.add(npEntry); + String[] npEntry2 = {token, + "(NP " + treetoken + " DP[name])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slotP + "]>"}; + result.add(npEntry1); + result.add(npEntry2); } else if (pos.equals("NNP") || pos.equals("NNPS")) { /* DP */ @@ -215,7 +225,7 @@ /* ADJECTIVES */ else if (equalsOneOf(pos,adjective)) { - String slot = "SLOT_" + token + "/PROPERTY/"; + String slot = "SLOT_" + token + "/PROPERTY/" + token; List<String> preds = wordnet.getAttributes(token); for (Iterator<String> i = preds.iterator(); i.hasNext();) { slot += i.next(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -53,8 +53,8 @@ s = Preprocessor.normalize(s); String tagged = tagger.tag(s); System.out.println("Tagged input: " + tagged); - - String newtagged = Preprocessor.condenseNominals(tagged); + + String newtagged = Preprocessor.condenseNominals(Preprocessor.findNEs(tagged,s)); newtagged = Preprocessor.condense(newtagged); System.out.println("Preprocessed: " + newtagged); @@ -88,7 +88,12 @@ d2s.redundantEqualRenaming(drs); if (!containsModuloRenaming(drses,drs)) { - System.out.println(drs); // DEBUG +// // DEBUG +// System.out.println(drs); +// for (Slot sl : slots) { +// System.out.println(sl.toString()); +// } +// // drses.add(drs); try { Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/NERTest.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -12,20 +12,35 @@ * @param args */ public static void main(String[] args) { - String sentence = "Which software company produced World of Warcraft?"; + + List<String> namedEntities; - NER ner = new DBpediaSpotlightNER(); - long startTime = System.currentTimeMillis(); - List<String> namedEntities = ner.getNamedEntitites(sentence); + NER dbpner = new DBpediaSpotlightNER(); + NER lpner = new LingPipeNER(); + + String sentence1 = "Which software company produced World of Warcraft?"; + + long startTime = System.currentTimeMillis(); + namedEntities = dbpner.getNamedEntitites(sentence1); System.out.format("Using DBpedia Spotlight WebService (%d ms):\n", System.currentTimeMillis()-startTime); System.out.println(namedEntities + "\n"); - ner = new LingPipeNER(); startTime = System.currentTimeMillis(); - namedEntities = ner.getNamedEntitites(sentence); + namedEntities = lpner.getNamedEntitites(sentence1); System.out.format("Using Lingpipe API with local DBpedia dictionary (%d ms):\n", System.currentTimeMillis()-startTime); System.out.println(namedEntities); + String sentence2 = "Give me all actors of the television series Charmed and did Nirvana record Nevermind?"; + + startTime = System.currentTimeMillis(); + namedEntities = dbpner.getNamedEntitites(sentence2); + System.out.format("Using DBpedia Spotlight WebService (%d ms):\n", System.currentTimeMillis()-startTime); + System.out.println(namedEntities + "\n"); + + startTime = System.currentTimeMillis(); + namedEntities = lpner.getNamedEntitites(sentence2); + System.out.format("Using Lingpipe API with local DBpedia dictionary (%d ms):\n", System.currentTimeMillis()-startTime); + System.out.println(namedEntities); } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -11,13 +11,14 @@ public static void main(String[] args) { // TODO Auto-generated method stub - String s = "New/NNP York/NNP City/NNP is/VBZ a/DT US/NNP state/NN"; + String nep = "World"; + String s = "Who/WP developed/VBD the/DT video/NN game/NN World/NN of/IN Warcraft/NNP"; - Pattern nprepPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); - Matcher m = nprepPattern.matcher(s); + Pattern p = Pattern.compile("(\\s)?(" + nep + "/([A-Z]+))(\\s)?"); + Matcher m = p.matcher(s); while (m.find()) { - System.out.println("Found!"); - s = s.replaceFirst(m.group(1),m.group(2) + "/JJ"); + System.out.println("Found! " + m.group(2)); + s = s.replaceFirst(m.group(2),nep+"/NNP"); } System.out.println(s); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-24 15:57:37 UTC (rev 2819) @@ -59,7 +59,7 @@ * @param args */ public static void main(String[] args) { - File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-test-questions.xml"); List<String> questions = readQuestions(file); StringBuilder successful = new StringBuilder(); Modified: trunk/components-ext/successful.txt =================================================================== --- trunk/components-ext/successful.txt 2011-05-24 13:54:41 UTC (rev 2818) +++ trunk/components-ext/successful.txt 2011-05-24 15:57:37 UTC (rev 2819) @@ -1,197 +1,263 @@ ***************************************************************** -Give me all school types. +Which presidents of the United States had more than three children? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y rdf:type ?p0 . +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x ?p6 ?v1 . + ?x ?p5 ?y . + ?v0 rdf:type ?p4 . + ?x rdf:type ?p3 . + FILTER(?c > 3) . } >> SLOTS: -p0: CLASS {school types} +v1: RESOURCE {United States} +p3: CLASS {presidents,president} +p4: CLASS {children,child,kid,youngster,minor} +p5: PROPERTY {had} +p6: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x ?p2 ?y . + ?v1 ?p0 ?x . + ?v0 rdf:type ?p1 . + FILTER(?c > 3) . +} + +>> SLOTS: +v1: RESOURCE {United States} +p0: PROPERTY {presidents,president} +p1: CLASS {children,child,kid,youngster,minor} +p2: PROPERTY {had} ***************************************************************** -Who are the presidents of the United States? +Give me the official websites of actors of the television show Charmed. >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p2 ?v0 . + ?y ?p0 ?j . + ?v0 ?p2 ?v3 . + ?v3 ?p3 ?y . + ?v0 rdf:type ?p1 . } >> SLOTS: -v0: RESOURCE {United States} -p1: CLASS {presidents,president} -p2: PROPERTY {} +v0: RESOURCE {Charmed} +p0: PROPERTY {official} +p1: CLASS {television show} +p2: PROPERTY {actors,actor,histrion,player,thespian} +p3: PROPERTY {websites,website,site} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?v0 ?p0 ?y . + ?y ?p9 ?j . + ?v0 rdf:type ?p11 . + ?v0 ?p12 ?v3 . + ?y rdf:type ?p13 . + ?y ?p10 ?v3 . } >> SLOTS: -v0: RESOURCE {United States} -p0: PROPERTY {presidents,president} -***************************************************************** -Who was the wife of President Lincoln? +v0: RESOURCE {Charmed} +p9: PROPERTY {official} +p10: PROPERTY {} +p11: CLASS {television show} +p12: PROPERTY {actors,actor,histrion,player,thespian} +p13: CLASS {websites,website,site} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p2 ?v0 . + ?v0 rdf:type ?p16 . + ?y rdf:type ?p18 . + ?y ?p15 ?j . + ?v3 rdf:type ?p17 . + ?v3 ?p14 ?v0 . + ?y ?p14 ?v3 . } >> SLOTS: -v0: RESOURCE {President Lincoln} -p1: CLASS {wife} -p2: PROPERTY {} +v0: RESOURCE {Charmed} +p14: PROPERTY {} +p15: PROPERTY {official} +p16: CLASS {television show} +p17: CLASS {actors,actor,histrion,player,thespian} +p18: CLASS {websites,website,site} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?v0 ?p0 ?y . + ?v3 ?p7 ?y . + ?y ?p5 ?j . + ?v3 ?p4 ?v0 . + ?v3 rdf:type ?p8 . + ?v0 rdf:type ?p6 . } >> SLOTS: -v0: RESOURCE {President Lincoln} -p0: PROPERTY {wife} +v0: RESOURCE {Charmed} +p4: PROPERTY {} +p5: PROPERTY {official} +p6: CLASS {television show} +p7: PROPERTY {websites,website,site} +p8: CLASS {actors,actor,histrion,player,thespian} +***************************************************************** +Which river does the Brooklyn Bridge cross? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y ?p5 ?v0 . - ?y rdf:type ?p4 . +SELECT ?v0 WHERE { + ?v0 rdf:type ?p0 . + ?v0 ?p1 ?j . + ?v0 rdf:type ?p2 . } >> SLOTS: -v0: RESOURCE {President Lincoln} -p4: CLASS {wife} -p5: PROPERTY {} +p0: CLASS {river} +p1: PROPERTY {Brooklyn Bridge} +p2: CLASS {cross} +***************************************************************** +How many monarchical countries are there in Europe? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?v0 ?p3 ?y . +SELECT COUNT(?x) WHERE { + ?y rdf:type ?p1 . + ?y ?p0 ?v1 . + ?y ?p2 ?j . + FILTER(?y == ?y) . } >> SLOTS: -v0: RESOURCE {President Lincoln} -p3: PROPERTY {wife} +v1: RESOURCE {Europe} +p0: PROPERTY {} +p1: CLASS {countries,state,nation,country,land} +p2: PROPERTY {monarchical} ***************************************************************** -What is the official website of Tom Hanks? +Is the wife of President Obama called Michelle? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y ?p9 ?v0 . - ?y ?p8 ?j . - ?y rdf:type ?p7 . +ASK WHERE { + ?y rdf:type ?p2 . + ?y ?p4 'michelle' . + ?y ?p3 ?v0 . } >> SLOTS: -v0: RESOURCE {Tom Hanks} -p7: CLASS {website,site} -p8: PROPERTY {} -p9: PROPERTY {} +v0: RESOURCE {President Obama} +p2: CLASS {wife} +p3: PROPERTY {} +p4: PROPERTY {title,name} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?v1 ?p1 ?y . - ?y ?p0 ?j . +ASK WHERE { + ?v0 ?p1 ?y . + ?y ?p0 'michelle' . } >> SLOTS: -v1: RESOURCE {Tom Hanks} -p0: PROPERTY {} -p1: PROPERTY {website,site} +v0: RESOURCE {President Obama} +p0: PROPERTY {title,name} +p1: PROPERTY {wife} +***************************************************************** +Which states of Germany are governed by the Social Democratic Party? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y rdf:type ?p2 . - ?y ?p4 ?j . - ?y ?p3 ?v1 . +SELECT ?x WHERE { + ?x ?p4 ?v0 . + ?x rdf:type ?p3 . + ?y ?p2 ?x . } >> SLOTS: -v1: RESOURCE {Tom Hanks} -p2: CLASS {website,site} -p3: PROPERTY {} +y: RESOURCE {Social Democratic Party} +v0: RESOURCE {Germany} +p2: PROPERTY {governed} +p3: CLASS {states,state,province} p4: PROPERTY {} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y ?p6 ?j . - ?v0 ?p5 ?y . +SELECT ?x WHERE { + ?v0 ?p1 ?x . + ?y ?p0 ?x . } >> SLOTS: -v0: RESOURCE {Tom Hanks} -p5: PROPERTY {website,site} -p6: PROPERTY {} +y: RESOURCE {Social Democratic Party} +v0: RESOURCE {Germany} +p0: PROPERTY {governed} +p1: PROPERTY {states,state,province} ***************************************************************** -Who produced the most films? +Which U.S. states possess gold minerals? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT COUNT(?y) AS c2 WHERE { - ?x ?p1 ?y . - ?y rdf:type ?p0 . +SELECT ?x WHERE { + ?x rdf:type ?p2 . + ?x ?p1 ?j . + ?y rdf:type ?p3 . + ?x ?p0 ?y . } -ORDER BY DESC(?c2) -LIMIT 1 OFFSET 0 >> SLOTS: -p0: CLASS {films,movie,film,picture,pic} -p1: PROPERTY {produced} +p0: PROPERTY {possess} +p1: PROPERTY {US} +p2: CLASS {states,state,province} +p3: CLASS {gold minerals} ***************************************************************** -Which mountains are higher than the Nanga Parbat? +Which locations have more than two caves? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?y ?p1 ?j . - ?v0 ?p1 ?i . - ?y rdf:type ?p0 . - FILTER(?j > ?i) . +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x rdf:type ?p2 . + ?x ?p0 ?y . + ?v0 rdf:type ?p1 . + FILTER(?c > 2) . } >> SLOTS: -v0: RESOURCE {Nanga Parbat} -p0: CLASS {mountains,mountain,mount} -p1: PROPERTY {degree} +p0: PROPERTY {have} +p1: CLASS {caves,cave} +p2: CLASS {locations,location} ***************************************************************** -Who created English Wikipedia? +Who created Goofy? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -202,25 +268,49 @@ } >> SLOTS: -y: RESOURCE {English Wikipedia} +y: RESOURCE {Goofy} p0: PROPERTY {created} ***************************************************************** -Give me all actors starring in Batman Begins. +Give me all cities in New Jersey with more than 100000 inhabitants. >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { +SELECT ?y COUNT(?v0) AS ?c WHERE { + ?y rdf:type ?p6 . + ?v0 rdf:type ?p4 . + ?y ?p7 ?v3 . + ?v3 ?p5 ?v2 . + FILTER(?c > 100000) . + FILTER(?100000 == 100000) . +} + +>> SLOTS: +p4: CLASS {inhabitants,inhabitant,habitant,dweller,denizen} +p5: PROPERTY {} +p6: CLASS {cities,city,metropolis} +p7: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y COUNT(?v0) AS ?c WHERE { ?y ?p1 ?v1 . - ?y rdf:type ?p0 . + ?v0 rdf:type ?p0 . + ?y rdf:type ?p2 . + ?y ?p3 ?v3 . + FILTER(?c > 100000) . } >> SLOTS: -p0: CLASS {actors,actor,histrion,player,thespian} -p1: PROPERTY {starring} +p0: CLASS {inhabitants,inhabitant,habitant,dweller,denizen} +p1: PROPERTY {} +p2: CLASS {cities,city,metropolis} +p3: PROPERTY {} ***************************************************************** -Which software has been developed by organizations founded in California? +Which museum exhibits The Scream by Munch? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -229,372 +319,390 @@ SELECT ?x WHERE { ?y ?p0 ?v1 . ?x rdf:type ?p2 . - ?y ?p1 ?x . - ?y rdf:type ?p3 . + ?x ?p1 ?y . } >> SLOTS: -v1: RESOURCE {California} -p0: PROPERTY {founded} -p1: PROPERTY {developed} -p2: CLASS {software,package} -p3: CLASS {organizations,organization,organisation} +y: RESOURCE {Scream} +v1: RESOURCE {Munch} +p0: PROPERTY {} +p1: PROPERTY {exhibits} +p2: CLASS {museum} ***************************************************************** -Is Christian Bale starring in Batman Begins? +What is the revenue of IBM? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -ASK WHERE { - ?y ?p0 ?v0 . +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . } >> SLOTS: -y: RESOURCE {Christian Bale} -v0: RESOURCE {Batman Begins} -p0: PROPERTY {starring} -***************************************************************** -Give me the websites of companies with more than 500000 employees. +v0: RESOURCE {IBM} +p1: CLASS {revenue,gross,receipts} +p2: PROPERTY {} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v1 rdf:type ?p34 . - ?y ?p31 ?v1 . - ?v0 rdf:type ?p32 . - ?y rdf:type ?p35 . - ?v1 ?p33 ?v2 . - FILTER(?c > 500000) . +SELECT ?y WHERE { + ?v0 ?p0 ?y . } >> SLOTS: -p31: PROPERTY {} -p32: CLASS {employees,employee} -p33: PROPERTY {} -p34: CLASS {companies,company} -p35: CLASS {websites,website,site} +v0: RESOURCE {IBM} +p0: PROPERTY {revenue,gross,receipts} +***************************************************************** +Which states border Utah? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?y ?p13 ?v2 . - ?y rdf:type ?p17 . - ?y ?p14 ?v1 . - ?v2 rdf:type ?p16 . - ?v0 rdf:type ?p15 . - FILTER(?c > 500000) . +SELECT ?v0 WHERE { + ?v0 rdf:type ?p0 . + ?v0 rdf:type ?p1 . + FILTER(?v0 == ?v0) . } >> SLOTS: -p13: PROPERTY {} -p14: PROPERTY {} -p15: CLASS {employees,employee} -p16: CLASS {companies,company} -p17: CLASS {websites,website,site} +p0: CLASS {states,state,province} +p1: CLASS {border,borderline,delimitation,mete} +***************************************************************** +Which television shows were created by Walt Disney? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?y ?p6 ?v3 . - ?y ?p4 ?v1 . - ?y rdf:type ?p8 . - ?v0 rdf:type ?p5 . - ?v3 rdf:type ?p7 . - FILTER(?c > 500000) . +SELECT ?x WHERE { + ?x rdf:type ?p1 . + ?y ?p0 ?x . } >> SLOTS: -p4: PROPERTY {} -p5: CLASS {employees,employee} -p6: PROPERTY {} -p7: CLASS {companies,company} -p8: CLASS {websites,website,site} +y: RESOURCE {Walt Disney} +p0: PROPERTY {created} +p1: CLASS {television shows} +***************************************************************** +Which mountain is the highest after the Annapurna? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v1 ?p29 ?v2 . - ?v0 rdf:type ?p27 . - ?v1 ?p28 ?y . - ?v1 rdf:type ?p30 . - FILTER(?c > 500000) . +SELECT ?j ?y WHERE { + ?y ?p1 ?v1 . + ?y ?p2 ?j . + ?y rdf:type ?p0 . } +ORDER BY DESC(?j) +LIMIT 1 OFFSET 0 >> SLOTS: -p27: CLASS {employees,employee} -p28: PROPERTY {websites,website,site} -p29: PROPERTY {} -p30: CLASS {companies,company} +v1: RESOURCE {Annapurna} +p0: CLASS {mountain,mount} +p1: PROPERTY {} +p2: PROPERTY {highestdegree} +***************************************************************** +Which bridges are of the same type as the Manhattan Bridge? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?y ?p0 ?v1 . - ?v3 ?p3 ?y . - ?v3 rdf:type ?p2 . - ?v0 rdf:type ?p1 . - FILTER(?c > 500000) . +SELECT ?y WHERE { + ?y ?p3 ?j . + ?y rdf:type ?p2 . + ?y rdf:type ?p0 . + ?y ?p1 ?v0 . } >> SLOTS: -p0: PROPERTY {} -p1: CLASS {employees,employee} -p2: CLASS {companies,company} -p3: PROPERTY {websites,website,site} +v0: RESOURCE {Manhattan Bridge} +p0: CLASS {bridges,Bridges} +p1: PROPERTY {} +p2: CLASS {type} +p3: PROPERTY {same} +***************************************************************** +Which European countries are a constitutional monarchy? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v1 ?p20 ?v2 . - ?v1 rdf:type ?p21 . - ?v0 rdf:type ?p18 . - ?v1 ?p19 ?y . - FILTER(?c > 500000) . +ASK WHERE { + ?y ?p1 ?j . + ?y ?p2 ?v0 . + ?y rdf:type ?p0 . + ?y rdf:type ?p3 . + FILTER(?y == ?y) . } >> SLOTS: -p18: CLASS {employees,employee} -p19: PROPERTY {websites,website,site} -p20: PROPERTY {} -p21: CLASS {companies,company} +p0: CLASS {monarchy} +p1: PROPERTY {constitutional} +p2: PROPERTY {European} +p3: CLASS {countries,state,nation,country,land} +***************************************************************** +Who is the author of WikiLeaks? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v1 rdf:type ?p25 . - ?v1 ?p24 ?v2 . - ?y rdf:type ?p26 . - ?y ?p22 ?v1 . - ?v0 rdf:type ?p23 . - FILTER(?c > 500000) . +SELECT ?y WHERE { + ?v0 rdf:type ?p2 . + ?y rdf:type ?p4 . + ?y ?p3 ?v0 . } >> SLOTS: -p22: PROPERTY {} -p23: CLASS {employees,employee} -p24: PROPERTY {} -p25: CLASS {companies,company} -p26: CLASS {websites,website,site} +p2: CLASS {WikiLeaks} +p3: PROPERTY {} +p4: CLASS {author,writer} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v2 ?p12 ?y . - ?v0 rdf:type ?p10 . - ?v2 rdf:type ?p11 . - ?y ?p9 ?v1 . - FILTER(?c > 500000) . +SELECT ?y WHERE { + ?v0 rdf:type ?p0 . + ?v0 ?p1 ?y . } >> SLOTS: -p9: PROPERTY {} -p10: CLASS {employees,employee} -p11: CLASS {companies,company} -p12: PROPERTY {websites,website,site} +p0: CLASS {WikiLeaks} +p1: PROPERTY {author,writer} ***************************************************************** -Which actors were born in Germany? +What is the currency of the Czech Republic? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x WHERE { - ?x rdf:type ?p0 . - ?x ?p1 ?y . +SELECT ?y WHERE { + ?v0 ?p0 ?y . } >> SLOTS: -y: RESOURCE {Germany} -p0: CLASS {actors,actor,histrion,player,thespian} -p1: PROPERTY {born} -***************************************************************** -Which birds are there in the United States? +v0: RESOURCE {Czech Republic} +p0: PROPERTY {currency} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y ?p1 ?v1 . - ?y rdf:type ?p0 . + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . } >> SLOTS: -v1: RESOURCE {United States} -p0: CLASS {birds,bird} -p1: PROPERTY {} +v0: RESOURCE {Czech Republic} +p1: CLASS {currency} +p2: PROPERTY {} ***************************************************************** -Give me all European Capitals! +What is the area code of Berlin? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y ?p1 ?j . - ?y rdf:type ?p0 . + ?v0 ?p0 ?y . } >> SLOTS: -p0: CLASS {Capitals,capital} -p1: PROPERTY {} -***************************************************************** -When was DBpedia released? +v0: RESOURCE {Berlin} +p0: PROPERTY {area code} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?x ?p0 ?y . + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . } >> SLOTS: -x: RESOURCE {DBpedia} -p0: PROPERTY {releasedDate} +v0: RESOURCE {Berlin} +p1: CLASS {area code} +p2: PROPERTY {} ***************************************************************** -Which people were born in Heraklion? +Which countries have more than two official languages? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x WHERE { - ?x rdf:type ?p0 . +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?v0 ?p0 ?j . ?x ?p1 ?y . + ?x rdf:type ?p3 . + ?v0 rdf:type ?p2 . + FILTER(?c > 2) . } >> SLOTS: -y: RESOURCE {Heraklion} -p0: CLASS {people} -p1: PROPERTY {born} +p0: PROPERTY {official} +p1: PROPERTY {have} +p2: CLASS {languages,language} +p3: CLASS {countries,state,nation,country,land} ***************************************************************** -Which caves have more than 3 entrances? +Who is the owner of Universal Studios? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x COUNT(?v0) AS ?c WHERE { - ?x rdf:type ?p0 . - ?x ?p1 ?y . - ?v0 rdf:type ?p2 . - FILTER(?c > 3) . +SELECT ?y WHERE { + ?y rdf:type ?p2 . + ?y ?p1 ?v0 . } >> SLOTS: -p0: CLASS {caves,cave} -p1: PROPERTY {have} -p2: CLASS {entrances,entrance,entranceway,entryway,entry} -***************************************************************** -Give me all films produced by Hal Roach. +v0: RESOURCE {Universal Studios} +p1: PROPERTY {} +p2: CLASS {owner,proprietor} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?v1 ?p0 ?y . + ?v0 ?p0 ?y . } >> SLOTS: -p0: PROPERTY {produced} -p1: CLASS {films,movie,film,picture,pic} +v0: RESOURCE {Universal Studios} +p0: PROPERTY {owner,proprietor} ***************************************************************** -Which software has been published by Mean Hamster Software? +When did Germany join the EU? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +SELECT ?z WHERE { + ?x ?p1 ?z . + ?x ?p0 ?y . +} + +>> SLOTS: +y: RESOURCE {EU} +x: RESOURCE {Germany} +p0: PROPERTY {join} +p1: PROPERTY {date} +***************************************************************** +Which monarchs of the United Kingdom were married to a German? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + SELECT ?x WHERE { - ?x rdf:type ?p1 . - ?y ?p0 ?x . + ?v0 ?p1 ?x . + ?x ?p0 ?y . } >> SLOTS: -y: RESOURCE {Mean Hamster Software} -p0: PROPERTY {published} -p1: CLASS {software,package} +y: RESOURCE {German} +v0: RESOURCE {United Kingdom} +p0: PROPERTY {married} +p1: PROPERTY {monarchs,sovereign,monarch} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { + ?x ?p4 ?v0 . + ?x ?p2 ?y . + ?x rdf:type ?p3 . +} + +>> SLOTS: +y: RESOURCE {German} +v0: RESOURCE {United Kingdom} +p2: PROPERTY {married} +p3: CLASS {monarchs,sovereign,monarch} +p4: PROPERTY {} ***************************************************************** -What languages are spoken in Estonia? +What is the highest mountain in Germany? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { +SELECT ?j ?y WHERE { + ?y ?p1 ?v1 . + ?y ?p2 ?j . ?y rdf:type ?p0 . - ?y ?p1 ?v0 . } +ORDER BY DESC(?j) +LIMIT 1 OFFSET 0 >> SLOTS: -v0: RESOURCE {Estonia} -p0: CLASS {languages,language} -p1: PROPERTY {spoken} +v1: RESOURCE {Germany} +p0: CLASS {mountain,mount} +p1: PROPERTY {} +p2: PROPERTY {highestdegree} ***************************************************************** -Who owns Aldi? +Give me all soccer clubs in Spain. >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x WHERE { - ?x ?p0 ?y . +SELECT ?y WHERE { + ?y ?p1 ?v1 . + ?y rdf:type ?p0 . } >> SLOTS: -y: RESOURCE {Aldi} -p0: PROPERTY {owns} +v1: RESOURCE {Spain} +p0: CLASS {soccer clubs} +p1: PROPERTY {} ***************************************************************** -Who is called Dana? +What are the official languages of the Philippines? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y ?p0 'dana' . + ?v0 ?p1 ?y . + ?y ?p0 ?j . } >> SLOTS: -p0: PROPERTY {title,name} -***************************************************************** -Which books were written by Danielle Steel? +v0: RESOURCE {Philippines} +p0: PROPERTY {official} +p1: PROPERTY {languages,language} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x WHERE { - ?x rdf:type ?p1 . - ?y ?p0 ?x . +SELECT ?y WHERE { + ?y ?p4 ?v0 . + ?y ?p2 ?j . + ?y rdf:type ?p3 . } >> SLOTS: -y: RESOURCE {Danielle Steel} -p0: PROPERTY {written} -p1: CLASS {books,book} +v0: RESOURCE {Philippines} +p2: PROPERTY {official} +p3: CLASS {languages,language} +p4: PROPERTY {} ***************************************************************** -Which companies are located in California, USA? +Who is the mayor of New York City? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -602,69 +710,57 @@ SELECT ?y WHERE { ?y rdf:type ?p1 . - ?y ?p0 ?v0 . + ?y ?p2 ?v0 . } >> SLOTS: -v0: RESOURCE {California USA} -p0: PROPERTY {located} -p1: CLASS {companies,company} -***************************************************************** -Which country has the most official languages? +v0: RESOURCE {New York City} +p1: CLASS {mayor} +p2: PROPERTY {} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?j ?x WHERE { - ?x rdf:type ?p0 . - ?y ?p1 ?j . - ?y rdf:type ?p2 . - ?x ?p3 ?y . +SELECT ?y WHERE { + ?v0 ?p0 ?y . } -ORDER BY DESC(?j) -LIMIT 1 OFFSET 0 >> SLOTS: -p0: CLASS {country,state,nation,land,commonwealth} -p1: PROPERTY {} -p2: CLASS {languages,language} -p3: PROPERTY {has} +v0: RESOURCE {New York City} +p0: PROPERTY {mayor} ***************************************************************** -Who produced films starring Natalie Portman? +Who designed the Brooklyn Bridge? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?x WHERE { - ?x ?p1 ?y . - ?y rdf:type ?p0 . - ?y ?p2 ?v1 . + ?x ?p0 ?y . } >> SLOTS: -v1: RESOURCE {Natalie Portman} -p0: CLASS {films,movie,film,picture,pic} -p1: PROPERTY {produced} -p2: PROPERTY {starring} +y: RESOURCE {Brooklyn Bridge} +p0: PROPERTY {designed} ***************************************************************** -Give me all movies with Tom Cruise! +Which telecommunications organizations are located in Belgium? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p0 ?v1 . + ?y rdf:type ?p0 . + ?y ?p1 ?v0 . } >> SLOTS: -p0: PROPERTY {} -p1: CLASS {movies,movie,film,picture,pic} +v0: RESOURCE {Belgium} +p0: CLASS {telecommunications organizations} +p1: PROPERTY {located} ***************************************************************** -Give me all female German chancellors! +What is the profession of Frank Herbert? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -672,58 +768,104 @@ SELECT ?y WHERE { ?y rdf:type ?p1 . - ?y ?p2 ?j . - ?y ?p0 ?v0 . + ?y ?p2 ?v0 . } >> SLOTS: -p0: PROPERTY {} -p1: CLASS {chancellors,Chancellor} +v0: RESOURCE {Frank Herbert} +p1: CLASS {profession} p2: PROPERTY {} -***************************************************************** -Give me all soccer clubs in the Premier League. >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p0 ?v1 . + ?v0 ?p0 ?y . } >> SLOTS: -v1: RESOURCE {Premier League} -p0: PROPERTY {} -p1: CLASS {soccer clubs} +v0: RESOURCE {Frank Herbert} +p0: PROPERTY {profession} ***************************************************************** -When was Capcom founded? +What is the highest place of Karakoram? >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y WHERE { - ?x ?p0 ?y . +SELECT ?j ?y WHERE { + ?y ?p2 ?v0 . + ?y ?p4 ?j . + ?y rdf:type ?p3 . } +ORDER BY DESC(?j) +LIMIT 1 OFFSET 0 >> SLOTS: -x: RESOURCE {Capcom} -p0: PROPERTY {foundedDate} -***************************************************************** -What is the highest mountain? +v0: RESOURCE {Karakoram} +p2: PROPERTY {} +p3: CLASS {place,spot} +p4: PROPERTY {highestdegree} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?j ?y WHERE { + ?v0 ?p0 ?y . ?y ?p1 ?j . - ?y rdf:type ?p0 . } ORDER BY DESC(?j) LIMIT 1 OFFSET 0 >> SLOTS: -p0: CLASS {mountain,mount} -p1: PROPERTY {degree} +v0: RESOURCE {Karakoram} +p0: PROPERTY {place,spot} +p1: PROPERTY {highestdegree} +***************************************************************** +Give me the homepage of Forbes. +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v0 ?p0 ?y . +} + +>> SLOTS: +v0: RESOURCE {Forbes} +p0: PROPERTY {homepage} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y rdf:type ?p1 . + ?y ?p2 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {Forbes} +p1: CLASS {homepage} +p2: PROPERTY {} +***************************************************************** +Which companies are in the computer software industry? +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?v0 WHERE { + ?v0 rdf:type ?p0 . + ?v0 rdf:type ?p2 . + ?v0 rdf:type ?p1 . + FILTER(?v0 == ?v0) . +} + +>> SLOTS: +p0: CLASS {industry} +p1: CLASS {computer software,software,package} +p2: CLASS {companies,company} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-05-25 16:02:06
|
Revision: 2822 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2822&view=rev Author: christinaunger Date: 2011-05-25 16:01:55 +0000 (Wed, 25 May 2011) Log Message: ----------- [tbsl] integrated manually corrected tagged input Modified Paths: -------------- trunk/components-ext/failed.txt trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java trunk/components-ext/successful.txt Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java Modified: trunk/components-ext/failed.txt =================================================================== --- trunk/components-ext/failed.txt 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/failed.txt 2011-05-25 16:01:55 UTC (rev 2822) @@ -1,16 +1,10 @@ -Who is the daughter of Bill Clinton married to? -Where did Abraham Lincoln die? -In which country does the Nile start? -Is proinsulin a protein? -Which classis does the Millepede belong to? -How tall is Claudia Schiffer? -Give me the capitals of all U.S. states. -Is Egypts largest city also its capital? -In which country is the Limerick Lake? -In which films directed by Garry Marshall was Julia Roberts starring? -Was U.S. president Jackson involved in a war? -Which state of the United States of America has the highest density? -Which countries in the European Union adopted the Euro? -Through which countries does the Yenisei river flow? -When was the Battle of Gettysburg? -What did Bruce Carver die from? +Which/WDT presidents/NNS were/VBD born/VBN in/IN 1945/CD +List/VB all/DT episodes/NNS of/IN the/DT first/JJ season/NN of/IN the/DT HBO/NNP television/NN series/NN The/DT Sopranos/NNPS +Which/WDT people/NNS have/VBP as/IN their/PRP$ given/VBN name/NN Jimmy/NNP +Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP +Which/WDT companies/NNS work/VBP in/IN the/DT aerospace/NN industry/NN as/RB well/RB as/IN on/IN nuclear/JJ reactor/NN technology/NN +Which/WDT cities/NNS have/VBP more/JJR than/IN 2/CD million/CD inhabitants/NNS +Who/WP has/VBZ been/VBN the/DT 5th/JJ president/NN of/IN the/DT United/NNP States/NNPS of/IN America/NNP +In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/VB +Who/WP wrote/VBD the/DT book/NN The/DT pillars/NNS of/IN the/DT Earth/NNP +Which/WDT organizations/NNS were/VBD founded/VBN in/IN 1950/CD Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-25 16:01:55 UTC (rev 2822) @@ -49,22 +49,25 @@ String condensedstring = taggedstring; Matcher m; - Pattern compAdjPattern = Pattern.compile("\\s(\\w+/RBR.([a-zA-Z_0-9]+)/JJ)"); - Pattern superAdjPattern = Pattern.compile("\\s(\\w+/RBS.([a-zA-Z_0-9]+)/JJ)"); - Pattern howAdjPattern = Pattern.compile("\\s(\\w+/WRB.([a-zA-Z_0-9]+)/JJ)"); - Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NNS?.of/IN)"); - Pattern didPattern = Pattern.compile("(?i)(\\s((did)|(do)|(does))/VB.?)\\s"); - Pattern passivePattern1a = Pattern.compile("(((has)|(have)|(had))/VB[A-Z]?.been/VBN.(\\w+)/VBN.by/IN)"); - Pattern passivePattern1b = Pattern.compile("(\\s((has)|(have)|(had))/VB[A-Z]?(.+\\s)been/VBN\\s(\\w+)/VB(N|D))"); - Pattern passivePattern2a = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); - Pattern passivePattern2b = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)(\\s\\w+)/VB(N|D))"); - Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); - Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); - Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); - Pattern gerundinPattern = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)"); - Pattern vprepPattern = Pattern.compile("\\s((\\w+)/V[A-Z]+\\s\\w+/(IN|TO))"); - Pattern whenPattern = Pattern.compile("(?i)(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); - Pattern wherePattern = Pattern.compile("(?i)(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); + Pattern compAdjPattern = Pattern.compile("(\\w+/RBR.(\\w+)/JJ)"); + Pattern superAdjPattern = Pattern.compile("(\\w+/RBS.(\\w+)/JJ)"); + Pattern howAdjPattern = Pattern.compile("(\\w+/WRB.(\\w+)/JJ)"); + Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NNS?.of/IN)"); + Pattern didPattern = Pattern.compile("(?i)(\\s((did)|(do)|(does))/VB.?)\\s"); + Pattern prepfrontPattern = Pattern.compile("(\\A\\w+/((TO)|(IN)).)\\w+/WDT"); // TODO (Nicht ganz sauber. Bei P-Stranding immer zwei Querys, hier nur eine.) + Pattern passivePattern1a = Pattern.compile("(((has)|(have)|(had))/VB[A-Z]?.been/VBN.(\\w+)/VBN.by/IN)"); + Pattern passivePattern1b = Pattern.compile("(\\s((has)|(have)|(had))/VB[A-Z]?(.+\\s)been/VBN\\s(\\w+)/VB(N|D))"); + Pattern passivePattern2a = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); + Pattern pseudopassPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.\\w+/TO)"); + Pattern pseudopwhPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)\\s(\\w+)/VBN.\\w+/TO)"); + Pattern passivePattern2b = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)(\\s\\w+)/VB(N|D))"); + Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); + Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); + Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); + Pattern gerundinPattern = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)"); + Pattern vprepPattern = Pattern.compile("\\s((\\w+)/V[A-Z]+\\s\\w+/(IN|TO))"); + Pattern whenPattern = Pattern.compile("(?i)(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); + Pattern wherePattern = Pattern.compile("(?i)(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); m = compAdjPattern.matcher(condensedstring); while (m.find()) { @@ -86,6 +89,10 @@ while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),""); } + m = prepfrontPattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),""); + } m = passivePattern1a.matcher(condensedstring); while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE"); @@ -98,6 +105,14 @@ while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } + m = pseudopassPattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/VPREP"); + } + m = pseudopwhPattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+" "+m.group(8)+"/VPREP"); + } m = passivePattern2b.matcher(condensedstring); while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7) + m.group(8)+"/PASSIVE"); @@ -167,6 +182,10 @@ String out = tagged; + String[] postags = {"NN","NNS","NNP","NNPS","NPREP","JJ","JJR","JJS","JJH", + "VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN", + "GERUNDIN","VPREP","WHEN","WHERE","IN","TO","DT"}; + NER ner = new LingPipeNER(); List<String> namedentities = ner.getNamedEntitites(untagged); List<String> usefulnamedentities = new ArrayList<String>(); @@ -176,7 +195,7 @@ // keep only longest matches (e.g. keep 'World of Warcraft' and forget about 'Warcraft') // containing at least one upper case letter (in order to filter out errors like 'software') for (String s1 : namedentities) { - if (s1.matches(".*[A-Z].*")) { + if (s1.matches(".*[A-Z].*") && !Arrays.asList(postags).contains(s1)) { boolean isLongestMatch = true; for (String s2 : namedentities) { if (!s2.equals(s1) && s2.contains(s1)) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java 2011-05-25 16:01:55 UTC (rev 2822) @@ -177,7 +177,7 @@ jj_consume_token(-1); throw new ParseException(); } - dr = jj_consume_token(WORD); + dr = dr(); drs2 = DRS(); Complex_DRS_Condition drs; drs = new Complex_DRS_Condition(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj 2011-05-25 16:01:55 UTC (rev 2822) @@ -174,7 +174,7 @@ | drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | - quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr= <WORD> drs2=DRS() + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() { Complex_DRS_Condition drs; drs = new Complex_DRS_Condition(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2011-05-25 16:01:55 UTC (rev 2822) @@ -395,7 +395,7 @@ jj_consume_token(-1); throw new ParseException(); } - dr = jj_consume_token(WORD); + dr = dr(); drs2 = DRS(); Complex_DRS_Condition drs; drs = new Complex_DRS_Condition(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2011-05-25 16:01:55 UTC (rev 2822) @@ -365,7 +365,7 @@ | drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | - quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr= <WORD> drs2=DRS() + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() { Complex_DRS_Condition drs; drs = new Complex_DRS_Condition(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-25 16:01:55 UTC (rev 2822) @@ -14,7 +14,7 @@ private String[] noun = {"NN","NNS","NNP","NNPS","NPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; - private String[] preps = {"IN"}; + private String[] preps = {"IN","TO"}; public SlotBuilder() { @@ -144,7 +144,7 @@ "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] passEntry2 = {token, "(S DP[wh] (VP DP[dp] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(y,x) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry1); result.add(passEntry2); } @@ -180,14 +180,24 @@ String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + String[] whEntry = {token, + "(S DP[obj] (VP DP[subj] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); + result.add(whEntry); } - else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP") || pos.equals("VB")) { + else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { String[] vEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(vEntry); } + else if (pos.equals("VB")) { + String[] whEntry = {token, + "(S DP[obj] (VP DP[subj] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(whEntry); + } else if (pos.equals("VBG") || pos.equals("VBN")) { String[] gerEntry = {token, "(NP NP* (VP V:'" + token + "' DP[dp]))", Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-25 08:44:33 UTC (rev 2821) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-25 16:01:55 UTC (rev 2822) @@ -31,6 +31,7 @@ Parser p; boolean ONE_SCOPE_ONLY = true; + boolean UNTAGGED_INPUT = true; public Templator() { @@ -43,6 +44,10 @@ p.USE_DPS_AS_INITTREES = true; p.CONSTRUCT_SEMANTICS = true; } + + public void setUNTAGGED_INPUT(boolean b) { + UNTAGGED_INPUT = b; + } public Set<Template> buildTemplates(String s) { @@ -50,13 +55,19 @@ DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); boolean clearAgain = true; - s = Preprocessor.normalize(s); - String tagged = tagger.tag(s); - System.out.println("Tagged input: " + tagged); - + String tagged; + if (UNTAGGED_INPUT) { + s = Preprocessor.normalize(s); + tagged = tagger.tag(s); + System.out.println("Tagged input: " + tagged); + } + else { + tagged = s; + } + String newtagged = Preprocessor.condenseNominals(Preprocessor.findNEs(tagged,s)); - newtagged = Preprocessor.condense(newtagged); - System.out.println("Preprocessed: " + newtagged); + newtagged = Preprocessor.condense(newtagged); + System.out.println("Preprocessed: " + newtagged); p.parse(newtagged,g); Added: trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml 2011-05-25 16:01:55 UTC (rev 2822) @@ -0,0 +1,152 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?><dataset id="dbpedia-test"> +<question id="23"> +<string>Which/WDT presidents/NNS of/IN the/DT United/NNP States/NNPS had/VBD more/JJR than/IN three/CD children/NNS</string> +</question> +<question id="3"> +<string>Give/VB me/PRP the/DT official/JJ websites/NNS of/IN actors/NNS of/IN the/DT television/NN show/NN Charmed/VBN</string> +</question> +<question id="37"> +<string>Who/WP is/VBZ the/DT daughter/NN of/IN Bill/NNP Clinton/NNP married/VBN to/TO</string> +</question> +<question id="43"> +<string>Which/WDT river/NN does/VBZ the/DT Brooklyn/NNP Bridge/NNP cross/VB</string> +</question> +<question id="21"> +<string>How/WRB many/JJ monarchical/JJ countries/NNS are/VBP there/RB in/IN Europe/NNP</string> +</question> +<question id="7"> +<string>Where/WRB did/VBD Abraham/NNP Lincoln/NNP die/VB</string> +</question> +<question id="10"> +<string>Is/VBZ the/DT wife/NN of/IN President/NNP Obama/NNP called/VBD Michelle/NNP</string> +</question> +<question id="28"> +<string>Which/WDT states/NNS of/IN Germany/NNP are/VBP governed/VBN by/IN the/DT Social/NNP Democratic/NNP Party/NNP</string> +</question> +<question id="39"> +<string>Which/WDT US/NNP states/NNS possess/VBP gold/NN minerals/NNS</string> +</question> +<question id="50"> +<string>In/IN which/WDT country/NN does/VBZ the/DT Nile/NNP start/VB</string> +</question> +<question id="44"> +<string>Which/WDT locations/NNS have/VBP more/JJR than/IN two/CD caves/NNS</string> +</question> +<question id="30"> +<string>Is/VBZ proinsulin/NNP a/DT protein/NN</string> +</question> +<question id="12"> +<string>Which/WDT classis/NN does/VBZ the/DT Millepede/NNP belong/VBP to/TO</string> +</question> +<question id="49"> +<string>How/WRB tall/JJ is/VBZ Claudia/NNP Schiffer/NNP</string> +</question> +<question id="33"> +<string>Who/WP created/VBD Goofy/NNP</string> +</question> +<question id="4"> +<string>Give/VB me/PRP the/DT capitals/NNS of/IN all/DT US/NNP states/NNS</string> +</question> +<question id="18"> +<string>Give/VB me/PRP all/DT cities/NNS in/IN New/NNP Jersey/NNP with/IN more/JJR than/IN 100000/CD inhabitants/NNS</string> +</question> +<question id="31"> +<string>Which/WDT museum/NN exhibits/VBZ The/DT Scream/NNP by/IN Munch/NNP</string> +</question> +<question id="35"> +<string>Is/NNP Egypts/NNPS largest/JJS city/NN also/RB its/PRP$ capital/NN</string> +</question> +<question id="27"> +<string>What/WP is/VBZ the/DT revenue/NN of/IN IBM/NNP</string> +</question> +<question id="38"> +<string>Which/WDT states/VBZ border/VBZ Utah/NNP</string> +</question> +<question id="13"> +<string>In/IN which/WDT country/NN is/VBZ the/DT Limerick/NNP Lake/NNP</string> +</question> +<question id="32"> +<string>Which/WDT television/NN shows/NNS were/VBD created/VBN by/IN Walt/NNP Disney/NNP</string> +</question> +<question id="45"> +<string>Which/WDT mountain/NN is/VBZ the/DT highest/JJS after/IN the/DT Annapurna/NNP</string> +</question> +<question id="29"> +<string>In/IN which/WDT films/NNS directed/VBN by/IN Garry/NNP Marshall/NNP was/VBD Julia/NNP Roberts/NNP starring/VBG</string> +</question> +<question id="42"> +<string>Which/WDT bridges/NNS are/VBP of/IN the/DT same/JJ type/NN as/IN the/DT Manhattan/NNP Bridge/NNP</string> +</question> +<question id="14"> +<string>Was/VBD US/NNP president/NNP Jackson/NNP involved/VBD in/IN a/DT war/NN</string> +</question> +<question id="20"> +<string>Which/WDT European/JJ countries/NNS are/VBP a/DT constitutional/JJ monarchy/NN</string> +</question> +<question id="40"> +<string>Who/WP is/VBZ the/DT author/NN of/IN WikiLeaks/NNS</string> +</question> +<question id="17"> +<string>Which/WDT state/NN of/IN the/DT United/NNP States/NNPS of/IN America/NNP has/VBZ the/DT highest/JJS density/NN</string> +</question> +<question id="19"> +<string>What/WP is/VBZ the/DT currency/NN of/IN the/DT Czech/JJ Republic/NNP</string> +</question> +<question id="22"> +<string>Which/WDT countries/NNS in/IN the/DT European/NNP Union/NNP adopted/VBD the/DT Euro/NNP</string> +</question> +<question id="11"> +<string>What/WP is/VBZ the/DT area/NN code/NN of/IN Berlin/NNP</string> +</question> +<question id="9"> +<string>Which/WDT countries/NNS have/VBP more/JJR than/IN two/CD official/JJ languages/NNS</string> +</question> +<question id="16"> +<string>Who/WP is/VBZ the/DT owner/NN of/IN Universal/NNP Studios/NNPS</string> +</question> +<question id="34"> +<string>Through/IN which/WDT countries/NNS does/VBZ the/DT Yenisei/NNP river/NN flow/VB</string> +</question> +<question id="48"> +<string>When/WRB did/VBD Germany/NNP join/VB the/DT EU/NNP</string> +</question> +<question id="36"> +<string>Which/WDT monarchs/NNS of/IN the/DT United/NNP Kingdom/NNP were/VBD married/VBN to/TO a/DT German/JJ</string> +</question> +<question id="8"> +<string>When/WRB was/VBD the/DT Battle/NNP of/IN Gettysburg/NNP</string> +</question> +<question id="24"> +<string>What/WP is/VBZ the/DT highest/JJS mountain/NN in/IN Germany/NNP</string> +</question> +<question id="26"> +<string>Give/VB me/PRP all/DT soccer/NN clubs/NNS in/IN Spain/NNP</string> +</question> +<question id="5"> +<string>What/WP are/VBP the/DT official/JJ languages/NNS of/IN the/DT Philippines/NNPS</string> +</question> +<question id="6"> +<string>Who/WP is/VBZ the/DT mayor/NN of/IN New/NNP York/NNP City/NNP</string> +</question> +<question id="41"> +<string>Who/WP designed/VBD the/DT Brooklyn/NNP Bridge/NNP</string> +</question> +<question id="2"> +<string>Which/WDT telecommunications/NN organizations/NNS are/VBP located/VBN in/IN Belgium/NNP</string> +</question> +<question id="15"> +<string>What/WP is/VBZ the/DT profession/NN of/IN Frank/NNP Herbert/NNP</string> +</question> +<question id="46"> +<string>What/WP is/VBZ the/DT highest/JJS place/NN of/IN Karakoram/NNP</string> +</question> +<question id="25"> +<string>Give/VB me/PRP the/DT homepage/NN of/IN Forbes/NNP</string> +</question> +<question id="1"> +<string>Which/WDT companies/NNS are/VBP in/IN the/DT computer/NN software/NN industry/NN</string> +</question> +<question id="47"> +<string>What/WP did/VBD Bruce/NNP Carver/NNP die/VB from/IN</string> +</question> +</dataset> \ No newline at end of file Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml 2011-05-25 16:01:55 UTC (rev 2822) @@ -0,0 +1,26538 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?><dataset id="dbpedia-train"> +<question id="23"> +<string>Give/VB me/PRP all/DT school/NN types/NNS</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX yago: <http://dbpedia.org/class/yago/> +SELECT DISTINCT ?uri ?string +WHERE +{ + ?uri rdf:type yago:SchoolTypes . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Byzantine_university +</uri> +<string> +Byzantine university +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Abendgymnasium +</uri> +<string> +Abendgymnasium +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Adult_high_school +</uri> +<string> +Adult high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Alternative_school +</uri> +<string> +Alternative school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Area_school +</uri> +<string> +Area school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Art_school +</uri> +<string> +Art school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Beacon_School +</uri> +<string> +Beacon School +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Berufsoberschule +</uri> +<string> +Berufsoberschule +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Birmingham_board_schools +</uri> +<string> +Birmingham board schools +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Boarding_school +</uri> +<string> +Boarding school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Campus_university +</uri> +<string> +Campus university +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Cathedral_school +</uri> +<string> +Cathedral school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Catholic_school +</uri> +<string> +Catholic school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Central_school +</uri> +<string> +Central school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Charity_school +</uri> +<string> +Charity school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Charter_school +</uri> +<string> +Charter school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Christian_school +</uri> +<string> +Christian school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/City_Technology_College +</uri> +<string> +City Technology College +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Co-institutional +</uri> +<string> +Co-institutional +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Collegiate_institute +</uri> +<string> +Collegiate institute +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Collegiate_university +</uri> +<string> +Collegiate university +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Common_school +</uri> +<string> +Common school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Comprehensive_high_school +</uri> +<string> +Comprehensive high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Comprehensive_school +</uri> +<string> +Comprehensive school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Combined_school +</uri> +<string> +Combined school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Community_school +</uri> +<string> +Community school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Continuation_high_school +</uri> +<string> +Continuation high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Cram_schools_in_Hong_Kong +</uri> +<string> +Cram schools in Hong Kong +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Cram_school +</uri> +<string> +Cram school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Dance_studio +</uri> +<string> +Dance studio +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Dalton_Plan +</uri> +<string> +Dalton Plan +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Dame_school +</uri> +<string> +Dame school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Darul_uloom +</uri> +<string> +Darul uloom +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Day_school +</uri> +<string> +Day school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Elementary_Middle_School +</uri> +<string> +Elementary Middle School +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Elementary_school +</uri> +<string> +Elementary school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Faith_school +</uri> +<string> +Faith school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Federated_school +</uri> +<string> +Federated school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Film_school +</uri> +<string> +Film school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/First_school +</uri> +<string> +First school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Finishing_school +</uri> +<string> +Finishing school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Folk_high_school +</uri> +<string> +Folk high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Foundation_school +</uri> +<string> +Foundation school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Gaelscoil +</uri> +<string> +Gaelscoil +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Graduate_school +</uri> +<string> +Graduate school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Grammar_school +</uri> +<string> +Grammar school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Grant-maintained_school +</uri> +<string> +Grant-maintained school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Gurukul +</uri> +<string> +Gurukul +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Halau +</uri> +<string> +Halau +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Han_school +</uri> +<string> +Han school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Hedge_school +</uri> +<string> +Hedge school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/High_school +</uri> +<string> +High school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Independent_school +</uri> +<string> +Independent school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Infant_school +</uri> +<string> +Infant school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Institute_of_technology +</uri> +<string> +Institute of technology +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/International_school +</uri> +<string> +International school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Jewish_day_school +</uri> +<string> +Jewish day school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Juku +</uri> +<string> +Juku +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Junior_school +</uri> +<string> +Junior school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Kendriya_Vidyalaya +</uri> +<string> +Kendriya Vidyalaya +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Laboratory_school +</uri> +<string> +Laboratory school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Latin_school +</uri> +<string> +Latin school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Law_school_in_the_United_States +</uri> +<string> +Law school in the United States +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Liceo_classico +</uri> +<string> +Liceo classico +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Liceo_scientifico +</uri> +<string> +Liceo scientifico +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Liceum_og%C3%B3lnokszta%C5%82c%C4%85ce +</uri> +<string> +Liceum ogólnokształcące +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Lower_School +</uri> +<string> +Lower School +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Lutheran_school +</uri> +<string> +Lutheran school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Magnet_school +</uri> +<string> +Magnet school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Maktab +</uri> +<string> +Maktab +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Medical_school +</uri> +<string> +Medical school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Menntask%C3%B3li +</uri> +<string> +Menntaskóli +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Medieval_university +</uri> +<string> +Medieval university +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Middle_school +</uri> +<string> +Middle school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Military_academy +</uri> +<string> +Military academy +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Minor_seminary +</uri> +<string> +Minor seminary +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Monitorial_schools +</uri> +<string> +Monitorial schools +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Mixed-sex_education +</uri> +<string> +Mixed-sex education +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Multidenominational_school +</uri> +<string> +Multidenominational school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Normal_school +</uri> +<string> +Normal school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Nursery_school +</uri> +<string> +Nursery school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Nursing_school +</uri> +<string> +Nursing school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/One-room_school +</uri> +<string> +One-room school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Parochial_school +</uri> +<string> +Parochial school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pit_school +</uri> +<string> +Pit school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pre-kindergarten +</uri> +<string> +Pre-kindergarten +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pre-school_playgroup +</uri> +<string> +Pre-school playgroup +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pregnancy_school +</uri> +<string> +Pregnancy school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Primary_education +</uri> +<string> +Primary education +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Primary_school +</uri> +<string> +Primary school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Private_school +</uri> +<string> +Private school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Public_high_school +</uri> +<string> +Public high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Realschule +</uri> +<string> +Realschule +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Reform_school +</uri> +<string> +Reform school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Reggio_Emilia_approach +</uri> +<string> +Reggio Emilia approach +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Roma_Special_School +</uri> +<string> +Roma Special School +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Sail_training +</uri> +<string> +Sail training +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/School_of_the_Air +</uri> +<string> +School of the Air +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/School_of_Infantry +</uri> +<string> +School of Infantry +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Secondary_education +</uri> +<string> +Secondary education +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Secondary_modern_school +</uri> +<string> +Secondary modern school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Selective_school +</uri> +<string> +Selective school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Separate_school +</uri> +<string> +Separate school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Secondary_school +</uri> +<string> +Secondary school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Sixth_form_college +</uri> +<string> +Sixth form college +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Ski_school +</uri> +<string> +Ski school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Specialist_school +</uri> +<string> +Specialist school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Special_school +</uri> +<string> +Special school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Specialized_school +</uri> +<string> +Specialized school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/State_school +</uri> +<string> +State school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Sudbury_school +</uri> +<string> +Sudbury school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Summer_school +</uri> +<string> +Summer school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Technikon +</uri> +<string> +Technikon +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Taxonomy_of_schools +</uri> +<string> +Taxonomy of schools +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Technicum +</uri> +<string> +Technicum +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Training_ship +</uri> +<string> +Training ship +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Upper_school +</uri> +<string> +Upper school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/University-preparatory_school +</uri> +<string> +University-preparatory school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Utraquist_school +</uri> +<string> +Utraquist school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Veterinary_school +</uri> +<string> +Veterinary school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Vocational_technical_school +</uri> +<string> +Vocational technical school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Voluntary_controlled_school +</uri> +<string> +Voluntary controlled school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Voluntary_aided_school +</uri> +<string> +Voluntary aided school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Voluntary_secondary_school +</uri> +<string> +Voluntary secondary school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Vocational_school +</uri> +<string> +Vocational school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Waldorf_education +</uri> +<string> +Waldorf education +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Year-round_school +</uri> +<string> +Year-round school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/%C4%B0mam_Hatip_school +</uri> +<string> +İmam Hatip school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/International_Preschool +</uri> +<string> +International Preschool +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Integral_education +</uri> +<string> +Integral education +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Forest_kindergarten +</uri> +<string> +Forest kindergarten +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/K-8_school +</uri> +<string> +K-8 school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Cursinho +</uri> +<string> +Cursinho +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Hospital_school +</uri> +<string> +Hospital school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Akademie +</uri> +<string> +Akademie +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Monastic_school +</uri> +<string> +Monastic school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Abendhauptschule +</uri> +<string> +Abendhauptschule +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Orphan_school +</uri> +<string> +Orphan school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Forest_schools +</uri> +<string> +Forest schools +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Canadian_Indian_residential_school_system +</uri> +<string> +Canadian Indian residential school system +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Ungraded_school +</uri> +<string> +Ungraded school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Kolleg +</uri> +<string> +Kolleg +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Sports_school +</uri> +<string> +Sports school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Church_school +</uri> +<string> +Church school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/New_tech_high_school +</uri> +<string> +New tech high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Semester_school +</uri> +<string> +Semester school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Maryshore_Seminary +</uri> +<string> +Maryshore Seminary +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Village_college +</uri> +<string> +Village college +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Early_college_high_school +</uri> +<string> +Early college high school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pirivena +</uri> +<string> +Pirivena +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Therapeutic_boarding_school +</uri> +<string> +Therapeutic boarding school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Cadet_Corps +</uri> +<string> +Cadet Corps +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Direct_grant_grammar_school +</uri> +<string> +Direct grant grammar school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Approved_school +</uri> +<string> +Approved school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Dental_school +</uri> +<string> +Dental school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Escalator_school +</uri> +<string> +Escalator school +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Abendrealschule +</uri> +<string> +Abendrealschule +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Independent_school_(United_Kingdom) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Gymnasium_(school) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Mechanics'_Institutes +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/National_school_(Ireland) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Special_school_(Netherlands) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Academy_(English_school) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Wirtschaftsschule_(Bavaria) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Preparatory_school_(UK) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Partially_selective_school_(England) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Law_school_in_South_Korea +</uri> +<string> +Law school in South Korea +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Language%2Fculture_based_charter_school +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/S%C3%A1mi_school_(Sweden) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Independent_school_(Australia) +</uri> +<string> + +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Free_school_(England) +</uri> +<string> + +</string> +</answer> +</answers> +</question> +<question id="11"> +<string>Which/WDT presidents/NNS were/VBD born/VBN in/IN 1945/CD</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX onto: <http://dbpedia.org/ontology/> +PREFIX yago: <http://dbpedia.org/class/yago/> +SELECT DISTINCT ?uri ?string +WHERE +{ + { + ?uri rdf:type onto:President . + ?uri onto:birthDate ?date . + FILTER regex(?date,'^1945') . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } + } + UNION + { + ?uri rdf:type yago:President. + ?uri onto:birthDate ?date . + FILTER regex(?date, '^1945') . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } + } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Akbar_Tanjung +</uri> +<string> +Akbar Tanjung +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Charles_Margai +</uri> +<string> +Charles Margai +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Daniel_Ortega +</uri> +<string> +Daniel Ortega +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Igor_Ivanov +</uri> +<string> +Igor Ivanov +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Joe_Ghiz +</uri> +<string> +Joe Ghiz +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Joe_Robert_Pemagbi +</uri> +<string> +Joe Robert Pemagbi +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/John_Fahey_%28politician%29 +</uri> +<string> +John Fahey (politician) +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/John_Olsen +</uri> +<string> +John Olsen +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Jorge_Serrano_El%C3%ADas +</uri> +<string> +Jorge Serrano Elías +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Laurent_Gbagbo +</uri> +<string> +Laurent Gbagbo +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Lee_Jong-wook +</uri> +<string> +Lee Jong-wook +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Levon_Ter-Petrossian +</uri> +<string> +Levon Ter-Petrossian +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Luc_Van_den_Brande +</uri> +<string> +Luc Van den Brande +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Luiz_In%C3%A1cio_Lula_da_Silva +</uri> +<string> +Luiz Inácio Lula da Silva +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Mom%C4%8Dilo_Kraji%C5%A1nik +</uri> +<string> +Momčilo Krajišnik +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Moshe_Katsav +</uri> +<string> +Moshe Katsav +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Radovan_Karad%C5%BEi%C4%87 +</uri> +<string> +Radovan Karadžić +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Vladislav_Ardzinba +</uri> +<string> +Vladislav Ardzinba +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Zlatko_Tom%C4%8Di%C4%87 +</uri> +<string> +Zlatko Tomčić +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Yuriy_Meshkov +</uri> +<string> +Yuriy Meshkov +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Abdirahman_Mohamud_Farole +</uri> +<string> +Abdirahman Mohamud Farole +</string> +</answer> +</answers> +</question> +<question id="1"> +<string>Who/WP are/VBP the/DT presidents/NNS of/IN the/DT United/NNP States/NNPS</string> +<query> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX yago: <http://dbpedia.org/class/yago/> +PREFIX onto: <http://dbpedia.org/ontology/> +PREFIX res: <http://dbpedia.org/resource/> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX prop: <http://dbpedia.org/property/> +SELECT DISTINCT ?uri ?string +WHERE +{ + { + ?uri rdf:type yago:PresidentsOfTheUnitedStates. + } + UNION + { + ?uri rdf:type onto:President. + ?uri prop:title res:President_of_the_United_States. + } + OPTIONAL {?uri rdfs:label ?string. FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Abraham_Lincoln +</uri> +<string> +Abraham Lincoln +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Andrew_Jackson +</uri> +<string> +Andrew Jackson +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Andrew_Johnson +</uri> +<string> +Andrew Johnson +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Barack_Obama +</uri> +<string> +Barack Obama +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Benjamin_Harrison +</uri> +<string> +Benjamin Harrison +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Bill_Clinton +</uri> +<string> +Bill Clinton +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Calvin_Coolidge +</uri> +<string> +Calvin Coolidge +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Chester_A._Arthur +</uri> +<string> +Chester A. Arthur +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Dwight_D._Eisenhower +</uri> +<string> +Dwight D. Eisenhower +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Franklin_D._Roosevelt +</uri> +<string> +Franklin D. Roosevelt +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Franklin_Pierce +</uri> +<string> +Franklin Pierce +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/George_W._Bush +</uri> +<string> +George W. Bush +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/George_Washington +</uri> +<string> +George Washington +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Gerald_Ford +</uri> +<string> +Gerald Ford +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Grover_Cleveland +</uri> +<string> +Grover Cleveland +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Harry_S._Truman +</uri> +<string> +Harry S. Truman +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Herbert_Hoover +</uri> +<string> +Herbert Hoover +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/James_A._Garfield +</uri> +<string> +James A. Garfield +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/James_Buchanan +</uri> +<string> +James Buchanan +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/James_K._Polk +</uri> +<string> +James K. Polk +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/James_Madison +</uri> +<string> +James Madison +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/James_Monroe +</uri> +<string> +James Monroe +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Jimmy_Carter +</uri> +<string> +Jimmy Carter +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/John_Adams +</uri> +<string> +John Adams +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/John_Quincy_Adams +</uri> +<string> +John Quincy Adams +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/John_Tyler +</uri> +<string> +John Tyler +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Lyndon_B._Johnson +</uri> +<string> +Lyndon B. Johnson +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Martin_Van_Buren +</uri> +<string> +Martin Van Buren +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Millard_Fillmore +</uri> +<string> +Millard Fillmore +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/President_of_the_United_States +</uri> +<string> +President of the United States +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Richard_Nixon +</uri> +<string> +Richard Nixon +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Ronald_Reagan +</uri> +<string> +Ronald Reagan +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Rutherford_B._Hayes +</uri> +<string> +Rutherford B. Hayes +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Theodore_Roosevelt +</uri> +<string> +Theodore Roosevelt +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Thomas_Jefferson +</uri> +<string> +Thomas Jefferson +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Timeline_of_Presidents_of_the_United_States +</uri> +<string> +Timeline of Presidents of the United States +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Ulysses_S._Grant +</uri> +<string> +Ulysses S. Grant +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Warren_G._Harding +</uri> +<string> +Warren G. Harding +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/William_Henry_Harrison +</uri> +<string> +William Henry Harrison +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/William_Howard_Taft +</uri> +<string> +William Howard Taft +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/William_McKinley +</uri> +<string> +William McKinley +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Woodrow_Wilson +</uri> +<string> +Woodrow Wilson +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Zachary_Taylor +</uri> +<string> +Zachary Taylor +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/George_H._W._Bush +</uri> +<string> +George H. W. Bush +</string> +</answer> +</answers> +</question> +<question id="10"> +<string>Who/WP was/VBD the/DT wife/NN of/IN President/NNP Lincoln/NNP</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> +PREFIX onto: <http://dbpedia.org/ontology/> +SELECT ?uri ?string +WHERE +{ + ?person rdf:type onto:President . + ?person foaf:surname 'Lincoln'@en . + ?person onto:spouse ?uri. + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Mary_Todd_Lincoln +</uri> +<string> +Mary Todd Lincoln +</string> +</answer> +</answers> +</question> +<question id="9"> +<string>Who/WP developed/VBD the/DT video/NN game/NN World/NN of/IN Warcraft/NNP</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX onto: <http://dbpedia.org/ontology/> +SELECT ?uri ?string +WHERE +{ + ?subject rdf:type onto:Software . + ?subject rdfs:label 'World of Warcraft'@en . + ?subject onto:developer ?uri . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Blizzard_Entertainment +</uri> +<string> +Blizzard Entertainment +</string> +</answer> +</answers> +</question> +<question id="29"> +<string>What/WP is/VBZ the/DT official/JJ website/NN of/IN Tom/NNP Hanks/NNP</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> +SELECT ?uri +WHERE +{ + ?subject rdfs:label 'Tom Hanks'@en . + ?subject foaf:homepage ?uri +} +</query> +<answers> +<answer> +<uri> +http://www.youtube.com/tomhankschannel +</uri> +</answer> +</answers> +</question> +<question id="37"> +<string>List/VB all/DT episodes/NNS of/IN the/DT first/JJ season/NN of/IN the/DT HBO/NNP television/NN series/NN The/DT Sopranos/NNPS</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX onto: <http://dbpedia.org/ontology/> +PREFIX res: <http://dbpedia.org/resource/> +SELECT ?uri ?string +WHERE +{ + ?uri onto:series res:The_Sopranos . + ?uri onto:seasonNumber 1 . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/46_Long +</uri> +<string> +46 Long +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/A_Hit_Is_a_Hit +</uri> +<string> +A Hit Is a Hit +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Down_Neck +</uri> +<string> +Down Neck +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/I_Dream_of_Jeannie_Cusamano +</uri> +<string> +I Dream of Jeannie Cusamano +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Nobody_Knows_Anything +</uri> +<string> +Nobody Knows Anything +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Pax_Soprana +</uri> +<string> +Pax Soprana +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/The_Legend_of_Tennessee_Moltisanti +</uri> +<string> +The Legend of Tennessee Moltisanti +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Isabella_%28The_Sopranos%29 +</uri> +<string> +Isabella (The Sopranos) +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Denial,_Anger,_Acceptance +</uri> +<string> +Denial, Anger, Acceptance +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Meadowlands_%28The_Sopranos%29 +</uri> +<string> +Meadowlands (The Sopranos) +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/The_Sopranos_%28episode%29 +</uri> +<string> +The Sopranos (episode) +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Boca_%28The_Sopranos%29 +</uri> +<string> +Boca (The Sopranos) +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/College_%28The_Sopranos%29 +</uri> +<string> +College (The Sopranos) +</string> +</answer> +</answers> +</question> +<question id="31"> +<string>Who/WP produced/VBD the/DT most/JJS films/NNS</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX onto: <http://dbpedia.org/ontology/> +SELECT ?uri ?string +WHERE +{ + ?film rdf:type onto:Film . + ?film onto:producer ?uri . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} ORDER BY DESC(COUNT(?film)) LIMIT 1 +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Hal_Roach +</uri> +<string> +Hal Roach +</string> +</answer> +</answers> +</question> +<question id="8"> +<string>Which/WDT people/NNS have/VBP as/IN their/PRP$ given/VBN name/NN Jimmy/NNP</string> +<query> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> +SELECT DISTINCT ?uri ?string +WHERE +{ + ?uri rdf:type foaf:Person. + ?uri foaf:givenName 'Jimmy'@en . + OPTIONAL {?uri rdfs:label ?string . FILTER (lang(?string) = 'en') } +} +</query> +<answers> +<answer> +<uri> +http://dbpedia.org/resource/Jimmy_Fricke +</uri> +<string> +Jimmy Fricke +</string> +</answer> +<answer> +<uri> +http://dbpedia.org/resource/Jimmy_Casella +</uri> +<string>... [truncated message content] |
From: <lor...@us...> - 2011-05-31 09:53:17
|
Revision: 2842 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2842&view=rev Author: lorenz_b Date: 2011-05-31 09:53:10 +0000 (Tue, 31 May 2011) Log Message: ----------- Added additional query generator which is ranks the URI candidates by string similarity. Made LingPipeNER object creation in Preprocessor class static, because otherwise the dictionary for LingPipe is loaded every time. Extended Eval script. Reverted to Solr 1.4.1 until new index for Solr 3.1 is created . Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/MultithreadedSPARQLQueryExecutor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Similarity.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/pom.xml 2011-05-31 09:53:10 UTC (rev 2842) @@ -46,7 +46,7 @@ <dependency> <groupId>org.apache.solr</groupId> <artifactId>solr-core</artifactId> - <version>3.1.0</version> + <version>1.4.1</version> <type>jar</type> <scope>compile</scope> </dependency> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -3,7 +3,9 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -23,7 +25,7 @@ import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.core.ActiveLearningAlgorithm; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -39,8 +41,12 @@ import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; -public class SPARQLTemplateBasedLearner implements ActiveLearningAlgorithm, SparqlQueryLearningAlgorithm{ +public class SPARQLTemplateBasedLearner implements SparqlQueryLearningAlgorithm{ + enum Ranking{ + LUCENE, SIMILARITY, NONE + } + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); @@ -48,8 +54,9 @@ private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-1.4.1"; private static final int RECURSION_DEPTH = 2; - private boolean USE_LUCENE_RANKING = true; + private Ranking ranking = Ranking.SIMILARITY; private boolean useRemoteEndpointValidation = true; + private boolean stopIfQueryResultNotEmpty = true; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -99,6 +106,10 @@ this.useRemoteEndpointValidation = useRemoteEndpointValidation; } + public void setRanking(Ranking ranking) { + this.ranking = ranking; + } + public void learnSPARQLQueries() throws NoTemplateFoundException{ learnedSPARQLQueries = new HashMap<String, List<String>>(); //generate SPARQL query templates @@ -116,12 +127,7 @@ } //generate SPARQL query candidates - Set<? extends Query> sparqlQueryCandidates; - if(USE_LUCENE_RANKING){ - sparqlQueryCandidates = getRatedSPARQLQueryCandidates(templates); - } else { - sparqlQueryCandidates = getSPARQLQueryCandidates(templates); - } + Collection<? extends Query> sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); //test candidates if(useRemoteEndpointValidation){ //on remote endpoint @@ -147,33 +153,14 @@ return workingModel; } -// private List<String> getSPARQLQueryCandidates(Set<Template> templates){ -// logger.info("Generating candidate SPARQL queries..."); -// mon.start(); -// List<String> queries = new ArrayList<String>(); -// Query query; -// for(Template template : templates){ -// query = template.getQuery(); -// queries.add(query.toString()); -// for(Slot slot : template.getSlots()){ -// Set<String> tmp = new HashSet<String>(); -// String var = slot.getAnchor(); -// List<String> words = slot.getWords(); -// for(String uri : getCandidateURIs(slot)){ -// for(String q : queries){ -// tmp.add(q.replace("?" + var, "<" + uri + ">")); -// } -// } -// if(!words.isEmpty()){ -// queries.clear(); -// queries.addAll(tmp); -// } -// } -// } -// mon.stop(); -// logger.info("Done in " + mon.getLastValue() + "ms."); -// return queries; -// } + private Collection<? extends Query> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ + switch(ranking){ + case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); + case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); + case NONE: return getSPARQLQueryCandidates(templates); + default: return null; + } + } private Set<Query> getSPARQLQueryCandidates(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); @@ -233,7 +220,7 @@ return query2Score; } - private Set<RatedQuery> getRatedSPARQLQueryCandidates(Set<Template> templates){ + private Set<RatedQuery> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); @@ -265,6 +252,35 @@ return ratedQueries; } + private List<Query> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ + logger.info("Generating candidate SPARQL queries..."); + mon.start(); + List<Query> queries = new ArrayList<Query>(); + + for(Template template : templates){ + queries.add(template.getQuery()); + for(Slot slot : template.getSlots()){ + List<Query> tmp = new ArrayList<Query>(); + String var = slot.getAnchor(); + List<String> words = slot.getWords(); + for(String uri : getCandidateURIsSortedBySimilarity(slot)){ + for(Query query : queries){ + Query newQuery = new Query(query); + newQuery.replaceVarWithURI(var, uri); + tmp.add(newQuery); + } + } + if(!words.isEmpty()){ + queries.clear(); + queries.addAll(tmp); + } + } + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + return queries; + } + private Set<String> getCandidateURIs(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); @@ -287,6 +303,59 @@ return uris; } + private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ + List<String> sortedURIs = new ArrayList<String>(); + + SolrSearch index = getIndexBySlotType(slot); + + SortedSet<String> tmp; + List<String> uris; + for(String word : slot.getWords()){ + tmp = new TreeSet<String>(new StringSimilarityComparator(word)); + uris = index.getResources("label:\"" + word + "\""); + tmp.addAll(uris); + sortedURIs.addAll(tmp); + tmp.clear(); + } + + return sortedURIs; + } + + class StringSimilarityComparator implements Comparator<String>{ + private String s; + + public StringSimilarityComparator(String s) { + this.s = s; + } + + @Override + public int compare(String s1, String s2) { + double sim1 = Similarity.getSimilarity(s, s1); + double sim2 = Similarity.getSimilarity(s, s2); + + if(sim1 < sim2){ + return 1; + } else if(sim1 > sim2){ + return -1; + } else { + return s1.compareTo(s2); + } + } + + } + + private SolrSearch getIndexBySlotType(Slot slot){ + SolrSearch index = null; + if(slot.getSlotType() == SlotType.CLASS){ + index = class_index; + } else if(slot.getSlotType() == SlotType.PROPERTY){ + index = property_index; + } else if(slot.getSlotType() == SlotType.RESOURCE){ + index = resource_index; + } + return index; + } + private Map<String, Float> getCandidateURIsWithScore(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); @@ -310,7 +379,7 @@ return uri2Score; } - private void validateAgainstRemoteEndpoint(Set<? extends Query> queries){ + private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); for(Query query : queries){ queryStrings.add(query.toString()); @@ -326,6 +395,9 @@ List<String> results = getResultFromRemoteEndpoint(query); if(!results.isEmpty()){ learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } } logger.info("Result: " + results); } @@ -333,7 +405,7 @@ logger.info("Done in " + mon.getLastValue() + "ms."); } - private void validateAgainstLocalModel(Set<? extends Query> queries){ + private void validateAgainstLocalModel(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); for(Query query : queries){ queryStrings.add(query.toString()); @@ -387,13 +459,15 @@ * @throws NoTemplateFoundException */ public static void main(String[] args) throws MalformedURLException, NoTemplateFoundException { - String question = "Give me all countries in Europe";//Give me all soccer clubs in Premier League"; + String question = "Give me all soccer clubs in Premier League"; +// String question = "Give me all films starring Brad Pitt"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), Collections.<String>singletonList("http://dbpedia.org"), Collections.<String>emptyList()); learner.setEndpoint(endpoint); learner.setQuestion(question); learner.learnSPARQLQueries(); + System.out.println(learner.getBestSPARQLQuery()); } @@ -407,13 +481,8 @@ } @Override - public void setOracle(Oracle oracle) { - this.oracle = oracle; - } - - @Override public String getBestSPARQLQuery() { - return null; + return learnedSPARQLQueries.keySet().iterator().next(); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -15,6 +15,8 @@ static final String[] genericReplacements = { "\"", "", "'", "", "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; + static NER ner = new LingPipeNER(); + public Preprocessor() { } @@ -186,7 +188,7 @@ "VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN", "GERUNDIN","VPREP","WHEN","WHERE","IN","TO","DT"}; - NER ner = new LingPipeNER(); +// NER ner = new LingPipeNER(); List<String> namedentities = ner.getNamedEntitites(untagged); List<String> usefulnamedentities = new ArrayList<String>(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -5,6 +5,8 @@ import java.util.ArrayList; import java.util.List; +import org.apache.log4j.Logger; + import com.aliasi.chunk.Chunk; import com.aliasi.chunk.Chunker; import com.aliasi.chunk.Chunking; @@ -15,12 +17,14 @@ public class LingPipeNER implements NER{ - private static final String DICTIONARY_PATH = "src/main/resources/tbsl/models/dbpedia_lingpipe.dictionary"; + private static Logger logger = Logger.getLogger(LingPipeNER.class); + private static final String DICTIONARY_PATH = "tbsl/models/dbpedia_lingpipe.dictionary"; + private Chunker ner; public LingPipeNER() { - this(true, true); + this(true, false); } public LingPipeNER(boolean caseSensitive) { @@ -29,8 +33,12 @@ public LingPipeNER(boolean caseSensitive, boolean allMatches) { try { - Dictionary<String> dictionary = (Dictionary<String>) AbstractExternalizable.readObject(new File(DICTIONARY_PATH)); + long startTime = System.currentTimeMillis(); + logger.info("Initializing LingPipe NER..."); + String path = this.getClass().getClassLoader().getResource(DICTIONARY_PATH).getPath(); + Dictionary<String> dictionary = (Dictionary<String>) AbstractExternalizable.readObject(new File(path)); ner = new ExactDictionaryChunker(dictionary, IndoEuropeanTokenizerFactory.INSTANCE, allMatches, caseSensitive); + logger.info("Done in " + (System.currentTimeMillis()-startTime) + "ms."); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -11,13 +11,14 @@ import edu.stanford.nlp.tagger.maxent.MaxentTagger; public class StanfordPartOfSpeechTagger implements PartOfSpeechTagger{ + + private static final String MODEL = "tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; private MaxentTagger tagger; public StanfordPartOfSpeechTagger(){ try { -// String modelPath = ClassLoader.getSystemResource("tbsl/models/bidirectional-distsim-wsj-0-18.tagger").toString(); - String modelPath = "src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; + String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); tagger = new MaxentTagger(modelPath); } catch (IOException e) { e.printStackTrace(); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/MultithreadedSPARQLQueryExecutor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/MultithreadedSPARQLQueryExecutor.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/MultithreadedSPARQLQueryExecutor.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -0,0 +1,88 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; + +public class MultithreadedSPARQLQueryExecutor { + + private int threadCount; + private SparqlEndpoint endpoint; + + private ExecutorService es; + + public MultithreadedSPARQLQueryExecutor(SparqlEndpoint endpoint) { + this(endpoint, Runtime.getRuntime().availableProcessors()); + } + + public MultithreadedSPARQLQueryExecutor(SparqlEndpoint endpoint, int threadCount) { + this.endpoint = endpoint; + this.threadCount = threadCount; + + es = Executors.newFixedThreadPool(threadCount); + } + + public List<ResultSet> executeQueries(List<String> queries){ + List<ResultSet> result = new ArrayList<ResultSet>(); + + Future<ResultSet>[] ret = new Future[queries.size()]; + + for(int i = 0; i < queries.size(); i++){ + ret[i] = es.submit(new SPARQLQueryExecutionTask(queries.get(i))); + } + + for (int i = 0; i < queries.size(); i++) { + try { + result.add(ret[i].get()); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (ExecutionException e) { + e.printStackTrace(); + } + } + + return result; + } + + public void close(){ + es.shutdown(); + } + + private class SPARQLQueryExecutionTask implements Callable<ResultSet>{ + + private String query; + + public SPARQLQueryExecutionTask(String query){ + this.query = query; + } + + @Override + public ResultSet call() throws Exception { + QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for (String dgu : endpoint.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : endpoint.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + + ResultSet rs = null; + if(query.contains("SELECT")){ + rs = queryExecution.execSelect(); + } else if(query.contains("ASK")){ + queryExecution.execAsk(); + } + return rs; + } + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/MultithreadedSPARQLQueryExecutor.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Similarity.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Similarity.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Similarity.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -0,0 +1,23 @@ +package org.dllearner.algorithm.tbsl.util; + +import org.dllearner.algorithm.qtl.filters.I_Sub; + +import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein; +import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; + +public class Similarity { + + private static AbstractStringMetric qGramMetric = new QGramsDistance(); + private static AbstractStringMetric levensteinMetric = new Levenshtein(); + private static I_Sub substringMetric = new I_Sub(); + + public static double getSimilarity(String s1, String s2){ + float qGramSim = qGramMetric.getSimilarity(s1, s2); + float levensteinSim = levensteinMetric.getSimilarity(s1, s2); + double subStringSim = substringMetric.score(s1, s2, true); + + return (qGramSim + levensteinSim + subStringSim) / 3; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Similarity.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-05-30 09:14:36 UTC (rev 2841) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-05-31 09:53:10 UTC (rev 2842) @@ -31,15 +31,17 @@ import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.RDFNode; -public class Evaluation implements Oracle{ +public class Evaluation{ private static Logger logger = Logger.getLogger(Evaluation.class); private File evaluationFile; - private Map<String, String> question2query = new Hashtable<String, String>(); - private SortedMap<String, Set<String>> question2Answers = new TreeMap<String, Set<String>>(); + private Map<Integer, String> id2Question = new Hashtable<Integer, String>(); + private Map<Integer, String> id2Query = new Hashtable<Integer, String>(); + private SortedMap<Integer, Object> id2Answer = new TreeMap<Integer, Object>(); private SparqlEndpoint endpoint; @@ -64,11 +66,14 @@ Document doc = db.parse(file); doc.getDocumentElement().normalize(); NodeList questionNodes = doc.getElementsByTagName("question"); + int id; String question; String query; Set<String> answers; for(int i = 0; i < questionNodes.getLength(); i++){ Element questionNode = (Element) questionNodes.item(i); + //read question ID + id = Integer.valueOf(questionNode.getAttribute("id")); //Read question question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); //Read SPARQL query @@ -81,7 +86,8 @@ // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); // } - question2query.put(question, query); + id2Question.put(id, question); + id2Query.put(id, query); // question2Answers.put(question, answers); } @@ -98,13 +104,15 @@ } private void loadAnswers(){ + int questionId; String question; - Set<String> answers; - for(Entry<String, String> entry : question2query.entrySet()){ - question = entry.getKey(); + Object answer; + for(Entry<Integer, String> entry : id2Query.entrySet()){ + questionId = entry.getKey(); + question = entry.getValue(); try { - answers = getResources(entry.getValue()); - question2Answers.put(question, answers); + answer = getAnswerForSPARQLQuery(question, "uri"); + id2Answer.put(questionId, answer); } catch (Exception e) { e.printStackTrace(); } @@ -124,6 +132,40 @@ return resources; } + private Object getAnswerForSPARQLQuery(String query, String targetVar){ + logger.info("Query: " + query); + Object answer = null; + + if(query.contains("ASK")){ + answer = endpoint.executeAsk(query); + } else if(query.contains("COUNT")){ + + } else { + answer = new HashSet<String>(); + ResultSet rs = endpoint.executeSelect(query); + String variable; + if(rs.getResultVars().size() == 1){ + variable = rs.getResultVars().get(0); + } else { + variable = targetVar; + } + QuerySolution qs; + RDFNode node; + while(rs.hasNext()){ + qs = rs.next(); + node = qs.get(variable); + if(node.isURIResource()){ + ((HashSet)answer).add(node.asResource().getURI()); + } else if(node.isLiteral()){ + ((HashSet)answer).add(node.asLiteral().getLexicalForm()); + } + + } + } + logger.info("Answer: " + answer); + return answer; + } + public void setEndpoint(SparqlEndpoint endpoint){ this.endpoint = endpoint; } @@ -133,32 +175,43 @@ } public void run(){ - int cnt = 0; + SPARQLTemplateBasedLearner stbl = new SPARQLTemplateBasedLearner(); + int failed = 0; + int learnedCnt = 0; + int learnedCorrectlyCnt = 0; + int questionId; String question; - for(Entry<String, Set<String>> entry : question2Answers.entrySet()){ + Object answer; + for(Entry<Integer, String> entry : id2Question.entrySet()){ try { - question = entry.getKey(); - SPARQLTemplateBasedLearner stbl = new SPARQLTemplateBasedLearner(); + questionId = entry.getKey(); + question = entry.getValue(); + answer = id2Answer.get(questionId); + //set the question stbl.setQuestion(question); - stbl.setOracle(this); + //start learning stbl.learnSPARQLQueries(); - List<String> queries = stbl.getCurrentlyBestSPARQLQueries(1); - System.out.println(queries); + String learnedQuery = stbl.getBestSPARQLQuery(); + //get result for best learned query + Object learnedAnswer = getAnswerForSPARQLQuery(learnedQuery, "y"); + //compare to answers in target query + if(learnedAnswer.equals(answer)){ + learnedCorrectlyCnt++; + } else { + learnedCnt++; + } } catch (NoTemplateFoundException e) { e.printStackTrace(); - cnt++; + failed++; } catch(Exception e){ e.printStackTrace(); + failed++; } } + logger.info("Could generate SPARQL queries for " + learnedCnt + "/" + id2Question.size() + + " question from which " + learnedCorrectlyCnt + " are the correct answer."); } - @Override - public List<Double> classifyIndividuals(List<Individual> individuals) { - // TODO Auto-generated method stub - return null; - } - /** * @param args */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-06-13 13:18:16
|
Revision: 2866 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2866&view=rev Author: lorenz_b Date: 2011-06-13 13:18:07 +0000 (Mon, 13 Jun 2011) Log Message: ----------- Added some evaluation stuff. Removed unused lib folder. Modified Paths: -------------- trunk/components-ext/failed.txt trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java trunk/components-ext/src/main/resources/log4j.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java trunk/components-ext/successful.txt Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeLemmatizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java trunk/components-ext/src/main/resources/tbsl/evaluation/praeambel.tex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/MultithreadedSPARQLQueryExecutionTest.java Removed Paths: ------------- trunk/components-ext/src/main/resources/tbsl/lib/ Property Changed: ---------------- trunk/components-ext/ Property changes on: trunk/components-ext ___________________________________________________________________ Modified: svn:ignore - components-ext.iml target .classpath .project .settings + components-ext.iml target .classpath .project .settings log cache construct-cache Modified: trunk/components-ext/failed.txt =================================================================== --- trunk/components-ext/failed.txt 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/failed.txt 2011-06-13 13:18:07 UTC (rev 2866) @@ -1,10 +1,6 @@ -Which/WDT presidents/NNS were/VBD born/VBN in/IN 1945/CD -List/VB all/DT episodes/NNS of/IN the/DT first/JJ season/NN of/IN the/DT HBO/NNP television/NN series/NN The/DT Sopranos/NNPS -Which/WDT people/NNS have/VBP as/IN their/PRP$ given/VBN name/NN Jimmy/NNP -Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP -Which/WDT companies/NNS work/VBP in/IN the/DT aerospace/NN industry/NN as/RB well/RB as/IN on/IN nuclear/JJ reactor/NN technology/NN -Which/WDT cities/NNS have/VBP more/JJR than/IN 2/CD million/CD inhabitants/NNS -Who/WP has/VBZ been/VBN the/DT 5th/JJ president/NN of/IN the/DT United/NNP States/NNPS of/IN America/NNP -In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/VB -Who/WP wrote/VBD the/DT book/NN The/DT pillars/NNS of/IN the/DT Earth/NNP -Which/WDT organizations/NNS were/VBD founded/VBN in/IN 1950/CD +How/WRB tall/JJ is/VBZ Claudia/NNP Schiffer/NNP +Is/NNP Egypts/NNPS largest/JJS city/NN also/RB its/PRP$ capital/NN +Which/WDT states/VBZ border/VBZ Utah/NNP +In/IN which/WDT films/NNS directed/VBN by/IN Garry/NNP Marshall/NNP was/VBD Julia/NNP Roberts/NNP starring/VBG +Was/VBD US/NNP president/NNP Jackson/NNP involved/VBD in/IN a/DT war/NN +Which/WDT countries/NNS in/IN the/DT European/NNP Union/NNP adopted/VBD the/DT Euro/NNP Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -18,9 +18,12 @@ import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; +import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; +import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -38,6 +41,9 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.vocabulary.FOAF; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -71,9 +77,19 @@ private Oracle oracle; + private Map<String, List<String>> resourcesURICache; + private Map<String, List<String>> classesURICache; + private Map<String, List<String>> propertiesURICache; + private Map<String, List<String>> learnedSPARQLQueries; + private Set<Template> templates; + private Collection<? extends Query> sparqlQueryCandidates; + private Map<String, String> prefixMap; + private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); + + public SPARQLTemplateBasedLearner(){ resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); resource_index.setHitsPerPage(TOP_K); @@ -85,6 +101,15 @@ Set<String> predicateFilters = new HashSet<String>(); predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + + prefixMap = new HashMap<String, String>(); + prefixMap.put(RDF.getURI(), "rdf"); + prefixMap.put(RDFS.getURI(), "rdfs"); + prefixMap.put("http://dbpedia.org/ontology/", "dbo"); + prefixMap.put("http://dbpedia.org/property/", "dbp"); + prefixMap.put("http://dbpedia.org/resource/", "dbr"); + prefixMap.put(FOAF.getURI(), "foaf"); + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); templateGenerator = new Templator(); @@ -112,10 +137,13 @@ public void learnSPARQLQueries() throws NoTemplateFoundException{ learnedSPARQLQueries = new HashMap<String, List<String>>(); + resourcesURICache = new HashMap<String, List<String>>(); + classesURICache = new HashMap<String, List<String>>(); + propertiesURICache = new HashMap<String, List<String>>(); //generate SPARQL query templates logger.info("Generating SPARQL query templates..."); mon.start(); - Set<Template> templates = templateGenerator.buildTemplates(question); + templates = templateGenerator.buildTemplates(question); mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); if(templates.isEmpty()){ @@ -127,7 +155,7 @@ } //generate SPARQL query candidates - Collection<? extends Query> sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); + sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); //test candidates if(useRemoteEndpointValidation){ //on remote endpoint @@ -138,6 +166,48 @@ } + public List<String> getSPARQLQueries() throws NoTemplateFoundException{ + logger.info("Generating SPARQL query templates..."); + mon.start(); + templates = templateGenerator.buildTemplates(question); + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + if(templates.isEmpty()){ + throw new NoTemplateFoundException(); + } + logger.info("Templates:"); + for(Template t : templates){ + logger.info(t); + } + + //generate SPARQL query candidates + logger.info("Generating SPARQL query candidates..."); + mon.start(); + sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + + List<String> queries = new ArrayList<String>(); + for(Query q : sparqlQueryCandidates){ + queries.add(q.toString()); + } + + return queries; + } + + public Set<Template> getTemplates(){ + return templates; + } + + public List<String> getGeneratedSPARQLQueries(){ + List<String> queries = new ArrayList<String>(); + for(Query q : sparqlQueryCandidates){ + queries.add(q.toString()); + } + + return queries; + } + private Model getWorkingModel(List<String> resources){ logger.info("Generating local model..."); mon.start(); @@ -256,21 +326,37 @@ logger.info("Generating candidate SPARQL queries..."); mon.start(); List<Query> queries = new ArrayList<Query>(); - + List<String> uriCandidates; for(Template template : templates){ queries.add(template.getQuery()); for(Slot slot : template.getSlots()){ List<Query> tmp = new ArrayList<Query>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(String uri : getCandidateURIsSortedBySimilarity(slot)){ + SPARQL_Prefix prefix = null; + uriCandidates = getCandidateURIsSortedBySimilarity(slot); + for(String uri : uriCandidates){ + for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ + if(uri.startsWith(uri2prefix.getKey())){ + prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); + uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); + break; + } + } for(Query query : queries){ Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, uri); + if(prefix != null){ + newQuery.addPrefix(prefix); + newQuery.replaceVarWithPrefixedURI(var, uri); + } else { + newQuery.replaceVarWithURI(var, uri); + } + tmp.add(newQuery); } + prefix = null; } - if(!words.isEmpty()){ + if(!words.isEmpty() && !uriCandidates.isEmpty()){ queries.clear(); queries.addAll(tmp); } @@ -304,23 +390,65 @@ } private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ + logger.info("Generating URI candidates for " + slot.getWords() + "..."); + mon.start(); List<String> sortedURIs = new ArrayList<String>(); - + //get the appropriate index based on slot type SolrSearch index = getIndexBySlotType(slot); + //get the appropriate cache for URIs to avoid redundant queries to index + Map<String, List<String>> uriCache = getCacheBySlotType(slot); SortedSet<String> tmp; List<String> uris; - for(String word : slot.getWords()){ + + //prune the word list with lemmatizer only when slot type is not RESOURCE + List<String> words; + if(slot.getSlotType() == SlotType.RESOURCE){ + words = slot.getWords(); + } else { + words = getLemmatizedWords(slot.getWords()); + } + + for(String word : words){ tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = index.getResources("label:\"" + word + "\""); + uris = uriCache.get(word); + if(uris == null){ + uris = index.getResources("label:\"" + word + "\""); + uriCache.put(word, uris); + } tmp.addAll(uris); sortedURIs.addAll(tmp); tmp.clear(); } - + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("URIs: " + sortedURIs); return sortedURIs; } + private List<String> getLemmatizedWords(List<String> words){ + logger.info("Pruning word list " + words + "..."); + mon.start(); + List<String> pruned = new ArrayList<String>(); + for(String word : words){ + //currently only stem single words + if(word.contains(" ")){ + pruned.add(word); + } else { + String lemWord = lemmatizer.stem(word); + new LingPipeLemmatizer().stem(word); + if(!pruned.contains(lemWord)){ + pruned.add(lemWord); + } + } + + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Pruned list: " + pruned); + return pruned; + } + class StringSimilarityComparator implements Comparator<String>{ private String s; @@ -330,6 +458,7 @@ @Override public int compare(String s1, String s2) { + double sim1 = Similarity.getSimilarity(s, s1); double sim2 = Similarity.getSimilarity(s, s2); @@ -356,6 +485,18 @@ return index; } + private Map<String, List<String>> getCacheBySlotType(Slot slot){ + Map<String, List<String>> cache = null; + if(slot.getSlotType() == SlotType.CLASS){ + cache = classesURICache; + } else if(slot.getSlotType() == SlotType.PROPERTY){ + cache = propertiesURICache; + } else if(slot.getSlotType() == SlotType.RESOURCE){ + cache = resourcesURICache; + } + return cache; + } + private Map<String, Float> getCandidateURIsWithScore(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); @@ -431,11 +572,17 @@ private List<String> getResultFromRemoteEndpoint(String query){ List<String> resources = new ArrayList<String>(); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - resources.add(qs.get("y").toString()); + try { + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + resources.add(qs.get(projectionVar).toString()); + } + } catch (Exception e) { + logger.error("Query execution failed.", e); } return resources; } @@ -459,7 +606,10 @@ * @throws NoTemplateFoundException */ public static void main(String[] args) throws MalformedURLException, NoTemplateFoundException { - String question = "Give me all soccer clubs in Premier League"; +// Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); +// Logger.getLogger(HttpClient.class).setLevel(Level.OFF); +// Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); + String question = "Who are the presidents of the United States?"; // String question = "Give me all films starring Brad Pitt"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), @@ -482,7 +632,11 @@ @Override public String getBestSPARQLQuery() { - return learnedSPARQLQueries.keySet().iterator().next(); + if(!learnedSPARQLQueries.isEmpty()){ + return learnedSPARQLQueries.keySet().iterator().next(); + } else { + return null; + } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeLemmatizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeLemmatizer.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeLemmatizer.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.ArrayList; +import java.util.List; + +import com.aliasi.tokenizer.PorterStemmerTokenizerFactory; + +public class LingPipeLemmatizer implements Lemmatizer { + + @Override + public String stem(String word) { + return PorterStemmerTokenizerFactory.stem(word); + } + + @Override + public String stem(String word, String tag) { + return PorterStemmerTokenizerFactory.stem(word); + } + + @Override + public List<String> stem(List<String> words) { + List<String> stemmedWords = new ArrayList<String>(); + for(String word : words){ + stemmedWords.add(stem(word)); + } + return stemmedWords; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeLemmatizer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -355,6 +355,37 @@ } } + + public void replaceVarWithPrefixedURI(String var, String uri){ + SPARQL_Value subject; + SPARQL_Value property; + SPARQL_Value object; + + for(SPARQL_Triple triple : conditions){ + subject = triple.getVariable(); + property = triple.getProperty(); + object = triple.getValue(); + if(subject.isVariable()){ + if(subject.getName().equals(var)){ + subject.setName(uri); + subject.setIsVariable(false); + } + } + if(property.isVariable()){ + if(property.getName().equals(var)){ + property.setName(uri); + property.setIsVariable(false); + } + } + if(object.isVariable()){ + if(object.getName().equals(var)){ + object.setName(uri); + object.setIsVariable(false); + } + } + + } + } @Override public int hashCode() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -8,7 +8,7 @@ public class WordNet { - public String path; + public String path = "tbsl/dict/"; public WordNetDatabase database; public WordNet(String s) { @@ -16,7 +16,7 @@ } public WordNet() { - path = "src/main/resources/tbsl/dict/"; + path = this.getClass().getClassLoader().getResource(path).getPath(); } public void setWordNetPath(String s) { Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -0,0 +1,112 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.Scanner; + +public class LatexWriter { + + private static String NL = System.getProperty("line.separator"); + private static final String PRAEAMBEL_FILE = "tbsl/evaluation/praeambel.tex"; + private StringBuilder sb; + + public LatexWriter() { + sb = new StringBuilder(); + + loadPraeambel(); + } + + private void loadPraeambel(){ + try { + Scanner scanner = new Scanner(new FileInputStream(this.getClass().getClassLoader().getResource(PRAEAMBEL_FILE).getPath())); + try { + while (scanner.hasNextLine()){ + sb.append(scanner.nextLine() + NL); + } + } + finally{ + scanner.close(); + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + public void makeTitle(){ + sb.append("\\maketitle\n"); + } + + public void buildTableOfContents(){ + sb.append("\\tableofcontents\n"); + sb.append("\\newpage\n"); + } + + public void beginDocument(){ + sb.append("\\begin{document}\n"); + makeTitle(); + buildTableOfContents(); + } + + public void endDocument(){ + sb.append("\\end{document}"); + } + + public void beginSection(String title){ + sb.append("\\section{").append(title).append("}\n"); + } + + public void beginSubsection(String title){ + sb.append("\\subsection*{").append(title).append("}\n"); + sb.append("\\addcontentsline{toc}{subsection}{").append(title).append("}\n"); + } + + public void beginSubSubsection(String title){ + sb.append("\\subsubsection*{").append(title).append("}\n"); + sb.append("\\addcontentsline{toc}{subsubsection}{").append(title).append("}\n"); + } + + public void beginEnumeration(){ + sb.append("\\begin{enumerate}\n"); + } + + public void endEnumeration(){ + sb.append("\\end{enumerate}\n"); + } + + public void beginEnumerationItem(){ + sb.append("\\item{\n"); + } + + public void endEnumerationItem(){ + sb.append("}\n"); + } + + public void addListing(String listing){ + sb.append("\\begin{lstlisting}[language=SPARQL, basicstyle=\\scriptsize, showstringspaces=false]\n"); + sb.append(listing).append("\n"); + sb.append("\\end{lstlisting}\n"); + } + + public void addText(String text){ + sb.append(text).append("\n"); + } + + public void write(String file){ + try { + Writer output = new BufferedWriter(new FileWriter(file)); + try { + output.write( sb.toString() ); + } + finally { + output.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/resources/log4j.properties =================================================================== --- trunk/components-ext/src/main/resources/log4j.properties 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/main/resources/log4j.properties 2011-06-13 13:18:07 UTC (rev 2866) @@ -25,7 +25,7 @@ # appenders are the output-targets defined above # loglevels: trace, debug, info, warn, error, fatal # -log4j.rootLogger=debug, stdout +log4j.rootLogger=INFO, stdout #, file @@ -42,6 +42,8 @@ ####SOLR log4j.category.org.apache.solr.level = OFF +log4j.logger.org.apache.http=OFF +log4j.logger.org.apache.http.wire=OFF Added: trunk/components-ext/src/main/resources/tbsl/evaluation/praeambel.tex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/praeambel.tex (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/praeambel.tex 2011-06-13 13:18:07 UTC (rev 2866) @@ -0,0 +1,44 @@ +\documentclass[a4paper,10pt]{article} +\usepackage{lmodern} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +% \usepackage{ngerman} +% \usepackage{picins} + +% \usepackage{beamerthemetree} + +\usepackage{color} +\usepackage{colortbl} +\usepackage{rotating,makecell} +\usepackage[hypertex]{hyperref} + +\usepackage{listings} + +\usepackage{graphicx} +%\usepackage{verbatim} +\usepackage{moreverb} + + +\lstdefinelanguage{SPARQL} +{ + morestring=[b]", +% morestring=[s]{>}{<}, + morecomment=[s]{<?}{?>}, + stringstyle=\color{black}, + identifierstyle=\color{black}, + keywordstyle=\color{blue}, + morekeywords={OPTIONAL, SELECT, DISTINCT, WHERE, FILTER, BOUND, UNION, PREFIX} +} + +\lstset{ + emph={OPTIONAL, SELECT, BOUND, DISTINCT, WHERE, FILTER, BOUND, UNION, PREFIX}, emphstyle=\color{blue}, + emph={[2]some,only,min,max,and,or,that,not}, emphstyle={[2]\color{red}}, + aboveskip={0 em}, + belowskip={0 em}, + literate={ö}{{\"o}}1 + {ä}{{\"a}}1 + {ü}{{\"u}}1 +} + +%opening +\title{Evaluation} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -122,7 +122,7 @@ int proCnt = Runtime.getRuntime().availableProcessors(); logger.info("Number of processor: " + proCnt); Future<Model>[] ret = new Future[proCnt]; - List<String> queries = queries = createSearchQueries("Hamburg", "Vienna", "Stuttgart", "Frankfurt", "Kiel");;//createQueries(resource, proCnt); + List<String> queries = createSearchQueries("Hamburg", "Vienna", "Stuttgart", "Frankfurt", "Kiel");;//createQueries(resource, proCnt); ExecutorService es = Executors.newFixedThreadPool(proCnt); for(int i = 0; i < 5; i++){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -1,11 +1,15 @@ package org.dllearner.algorithm.tbsl; +import java.io.BufferedWriter; import java.io.File; +import java.io.FileWriter; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URLDecoder; +import java.util.Collection; import java.util.HashSet; -import java.util.Hashtable; import java.util.List; -import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; @@ -18,11 +22,15 @@ import org.aksw.commons.sparql.core.SparqlEndpoint; import org.aksw.commons.sparql.core.decorator.CachingSparqlEndpoint; import org.aksw.commons.sparql.core.impl.HttpSparqlEndpoint; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Layout; +import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; -import org.dllearner.core.Oracle; -import org.dllearner.core.owl.Individual; +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.util.LatexWriter; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -39,8 +47,8 @@ private File evaluationFile; - private Map<Integer, String> id2Question = new Hashtable<Integer, String>(); - private Map<Integer, String> id2Query = new Hashtable<Integer, String>(); + private SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); + private SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); private SortedMap<Integer, Object> id2Answer = new TreeMap<Integer, Object>(); private SparqlEndpoint endpoint; @@ -133,14 +141,12 @@ } private Object getAnswerForSPARQLQuery(String query, String targetVar){ - logger.info("Query: " + query); + logger.debug("Query: " + query); Object answer = null; if(query.contains("ASK")){ answer = endpoint.executeAsk(query); - } else if(query.contains("COUNT")){ - - } else { + } else { answer = new HashSet<String>(); ResultSet rs = endpoint.executeSelect(query); String variable; @@ -162,7 +168,7 @@ } } - logger.info("Answer: " + answer); + logger.debug("Answer: " + answer); return answer; } @@ -174,48 +180,263 @@ stbl.setUseRemoteEndpointValidation(useRemoteValidation); } + public void run(){ - SPARQLTemplateBasedLearner stbl = new SPARQLTemplateBasedLearner(); - int failed = 0; - int learnedCnt = 0; - int learnedCorrectlyCnt = 0; + int topN2Print = 25; + + int questionId; String question; + String query; Object answer; + LatexWriter latex = new LatexWriter(); + latex.beginDocument(); + int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ + if(i++ == 1)break; try { questionId = entry.getKey(); question = entry.getValue(); + query = id2Query.get(questionId); answer = id2Answer.get(questionId); + logger.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); + logger.info("QUESTION: " + question + "\n"); + logger.info("TARGET QUERY:\n" + query + "\n"); + + + //write new section for query + latex.beginSection(question); + //write subsection for target + latex.beginSubsection("Target"); + //write subsubsection for target query + latex.beginSubSubsection("Query"); + latex.addListing(query); + //write subsubsection for target result + latex.beginSubSubsection("Result" + ((answer instanceof Collection<?>) ? "(" + ((Collection)answer).size()+")" : "")); + latex.addText(escapeAnswerString(answer)); + //set the question stbl.setQuestion(question); //start learning stbl.learnSPARQLQueries(); + //get the best learned query String learnedQuery = stbl.getBestSPARQLQuery(); - //get result for best learned query - Object learnedAnswer = getAnswerForSPARQLQuery(learnedQuery, "y"); - //compare to answers in target query - if(learnedAnswer.equals(answer)){ - learnedCorrectlyCnt++; - } else { - learnedCnt++; + //get result for best learned query if exists + Object learnedAnswer = null; + if(learnedQuery != null){ + learnedAnswer = getAnswerForSPARQLQuery(learnedQuery, "y"); } + //get the generated SPARQL query candidates + List<String> queries = stbl.getGeneratedSPARQLQueries(); + //get the used templates + Set<Template> templates = stbl.getTemplates(); + + //start output + //write templates subsection + latex.beginSubsection("Templates (" + templates.size() + ")"); + latex.beginEnumeration(); + for(Template t : templates){ + latex.beginEnumerationItem(); + latex.addListing(t.toString()); + latex.endEnumerationItem(); + } + latex.endEnumeration(); + + //write generated queries subsection + latex.beginSubsection("Top " + topN2Print + " generated queries (max. " + queries.size() + ")"); + logger.info("LEARNED QUERIES(#" + queries.size() + "):\n"); + int cnt = 1; + if(!queries.isEmpty()){ + latex.beginEnumeration(); + } + //print queries to log file + for(String q : queries){ + logger.info("QUERY " + cnt++ + ":\n" + q + "\n"); + logger.info("--------"); + } + //print top n queries to latex file + int max = Math.min(topN2Print, queries.size()); + for(int j = 0; j < max; j++){ + latex.beginEnumerationItem(); + latex.addListing(queries.get(j)); + latex.endEnumerationItem(); + } + if(!queries.isEmpty()){ + latex.endEnumeration(); + } + + //write solution subsection if exists + if(learnedQuery != null){ + latex.beginSubsection("Solution"); + latex.beginSubSubsection("Query"); + latex.addListing(learnedQuery); + latex.beginSubSubsection("Result" + ((learnedAnswer instanceof Collection<?>) ? "(" + ((Collection)learnedAnswer).size()+")" : "")); + latex.addText(escapeAnswerString(learnedAnswer, answer)); + } + + } catch (NoTemplateFoundException e) { e.printStackTrace(); - failed++; + logger.error("Template generation failed"); } catch(Exception e){ e.printStackTrace(); - failed++; + logger.error("ERROR"); } } - logger.info("Could generate SPARQL queries for " + learnedCnt + "/" + id2Question.size() - + " question from which " + learnedCorrectlyCnt + " are the correct answer."); + latex.endDocument(); + latex.write("log/evaluation.tex"); } + + public void run_without_testing_answer(){ + int topN2Print = 25; + + + int questionId; + String question; + String query; + Object answer; + LatexWriter latex = new LatexWriter(); + latex.beginDocument(); + int i = 0; + for(Entry<Integer, String> entry : id2Question.entrySet()){ + if(i++ == 1)break; + try { + questionId = entry.getKey(); + question = entry.getValue(); + query = id2Query.get(questionId); + answer = id2Answer.get(questionId); + logger.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); + logger.info("QUESTION: " + question + "\n"); + logger.info("TARGET QUERY:\n" + query + "\n"); + + + //write new section for query + latex.beginSection(question); + //write subsection for target + latex.beginSubsection("Target"); + //write subsubsection for target query + latex.beginSubSubsection("Query"); + latex.addListing(query); + //write subsubsection for target result + latex.beginSubSubsection("Result" + ((answer instanceof Collection<?>) ? "(" + ((Collection)answer).size()+")" : "")); + latex.addText(escapeAnswerString(answer)); + + //set the question + stbl.setQuestion(question); + //get the generated SPARQL query candidates + List<String> queries = stbl.getSPARQLQueries(); + //get the used templates + Set<Template> templates = stbl.getTemplates(); + + //start output + //write templates subsection + latex.beginSubsection("Templates (" + templates.size() + ")"); + latex.beginEnumeration(); + for(Template t : templates){ + latex.beginEnumerationItem(); + latex.addListing(t.toString()); + latex.endEnumerationItem(); + } + latex.endEnumeration(); + + //write generated queries subsection + latex.beginSubsection("Top " + topN2Print + " generated queries (max. " + queries.size() + ")"); + logger.info("LEARNED QUERIES(#" + queries.size() + "):\n"); + int cnt = 1; + if(!queries.isEmpty()){ + latex.beginEnumeration(); + } + //print queries to log file + for(String q : queries){ + logger.info("QUERY " + cnt++ + ":\n" + q + "\n"); + logger.info("--------"); + } + //print top n queries to latex file + int max = Math.min(topN2Print, queries.size()); + for(int j = 0; j < max; j++){ + latex.beginEnumerationItem(); + latex.addListing(queries.get(j)); + latex.endEnumerationItem(); + } + if(!queries.isEmpty()){ + latex.endEnumeration(); + } + + + } catch (NoTemplateFoundException e) { + e.printStackTrace(); + logger.error("Template generation failed"); + } catch(Exception e){ + e.printStackTrace(); + logger.error("ERROR"); + } + } + latex.endDocument(); + latex.write("log/evaluation.tex"); + } + private String escapeAnswerString(Object learnedAnswer, Object targetAnswer){ + if(learnedAnswer instanceof Collection<?>){ + Collection<?> target = (Collection<?>) targetAnswer; + StringBuilder sb = new StringBuilder(); + try { + int i = 1; + for(String s : (Collection<String>)learnedAnswer){ + if(target.contains(s)){ + s = "\\textcolor{green}{" + s + "}"; + } + sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); + if(i % 2 == 0){ + sb.append("\n"); + } + i++; + } + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + return sb.toString(); + } else { + return learnedAnswer.toString(); + } + + } + + private String escapeAnswerString(Object learnedAnswer){ + if(learnedAnswer instanceof Collection<?>){ + StringBuilder sb = new StringBuilder(); + try { + int i = 1; + for(String s : (Collection<String>)learnedAnswer){ + sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); + if(i % 2 == 0){ + sb.append("\n"); + } + i++; + } + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + return sb.toString(); + } else { + return learnedAnswer.toString(); + } + + } /** * @param args + * @throws IOException */ - public static void main(String[] args) { + public static void main(String[] args) throws IOException { + Logger.getLogger(SPARQLTemplateBasedLearner.class).setLevel(Level.OFF); + Logger.getLogger(Evaluation.class).setLevel(Level.INFO); + Logger.getRootLogger().removeAllAppenders(); + Layout layout = new PatternLayout("%m%n"); + FileAppender fileAppender = new FileAppender( + layout, "log/evaluation.log", false); + fileAppender.setThreshold(Level.INFO); + Logger.getRootLogger().addAppender(fileAppender); + + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); SparqlEndpoint endpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint("http://live.dbpedia.org/sparql/", "http://dbpedia.org/sparql"), "cache"); Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/MultithreadedSPARQLQueryExecutionTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/MultithreadedSPARQLQueryExecutionTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/MultithreadedSPARQLQueryExecutionTest.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -0,0 +1,103 @@ +package org.dllearner.algorithm.tbsl; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.dllearner.algorithm.tbsl.util.MultithreadedSPARQLQueryExecutor; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.w3c.dom.DOMException; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; + +public class MultithreadedSPARQLQueryExecutionTest { + + /** + * @param args + */ + public static void main(String[] args) { + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + + List<String> queries = new ArrayList<String>(); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(file); + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + String query; + for (int i = 0; i < questionNodes.getLength(); i++) { + Element questionNode = (Element) questionNodes.item(i); + // Read SPARQL query + query = ((Element) questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0) + .getNodeValue().trim(); + queries.add(query); + } + } catch (DOMException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + int threadCount = 5; + MultithreadedSPARQLQueryExecutor executor = new MultithreadedSPARQLQueryExecutor(endpoint, threadCount); + List<List<String>> lists = splitList(queries, threadCount); + long startTime = System.currentTimeMillis(); + for(List<String> list : lists){ + executor.executeQueries(list); + } + System.out.format("Multithreaded needed %d ms.\n", System.currentTimeMillis()-startTime); + executor.close(); + + + startTime = System.currentTimeMillis(); + for(String query : queries){ + executeSPARQLQuery(endpoint, query); + } + System.out.format("Sequentially needed %d ms.", System.currentTimeMillis()-startTime); + } + + public static <T> List<List<T>> splitList(List<T> list, int splitSize){ + List<List<T>> lists = new ArrayList<List<T>>(); + int partitionCount = list.size()/splitSize + 1; + for(int partition = 0; partition < partitionCount; partition++){ + int start = partition * splitSize; + int end = Math.min(start + splitSize, list.size()); + lists.add(list.subList(start, end)); + } + return lists; + } + + public static void executeSPARQLQuery(SparqlEndpoint endpoint, String query){ + QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for (String dgu : endpoint.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : endpoint.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + + ResultSet rs = null; + if(query.contains("SELECT")){ + rs = queryExecution.execSelect(); + } else if(query.contains("ASK")){ + queryExecution.execAsk(); + } + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/MultithreadedSPARQLQueryExecutionTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -5,6 +5,7 @@ import org.annolab.tt4j.TreeTaggerException; import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.LingPipePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.TreeTagger; @@ -13,17 +14,24 @@ public static void main(String[] args) throws IOException, ClassNotFoundException, TreeTaggerException { - String sentence = "Which rivers does the Brooklyn Bridge cross ?"; + String sentence = "When did Nirvana record Nevermind?"; +// String sentence = "Which rivers does the Brooklyn Bridge cross?"; + + //Stanford PartOfSpeechTagger tagger = new StanfordPartOfSpeechTagger(); long startTime = System.currentTimeMillis(); String tagged = tagger.tag(sentence); System.out.format("Tagged sentence with Stanford tagger (%d ms):\n", System.currentTimeMillis()-startTime); System.out.println(tagged + "\n"); - TreeTagger tt = new TreeTagger(); - tt.tag(sentence); +// //TreeTagger +// TreeTagger tt = new TreeTagger(); +// tt.tag(sentence); + + + //Apache OpenNLP tagger = new ApachePartOfSpeechTagger(); startTime = System.currentTimeMillis(); tagged = tagger.tag(sentence); @@ -31,13 +39,30 @@ startTime = System.currentTimeMillis(); System.out.println(tagged + "\n"); - + //Apache OpenNLP Top k startTime = System.currentTimeMillis(); List<String> topKTaggedSentences = tagger.tagTopK(sentence); System.out.format("Top k tags with Apache OpenNLP (%d ms):\n", System.currentTimeMillis()-startTime); for(String t : topKTaggedSentences){ System.out.println(t); } + + + //LingPipe + tagger = new LingPipePartOfSpeechTagger(); + startTime = System.currentTimeMillis(); + tagged = tagger.tag(sentence); + System.out.format("\nTagged sentence with LingPipe API (%d ms):\n", System.currentTimeMillis()-startTime); + startTime = System.currentTimeMillis(); + System.out.println(tagged + "\n"); + + //LingPipe Top k + startTime = System.currentTimeMillis(); + topKTaggedSentences = tagger.tagTopK(sentence); + System.out.format("Top k tags with LingPipe API (%d ms):\n", System.currentTimeMillis()-startTime); + for(String t : topKTaggedSentences){ + System.out.println(t); + } } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-06-13 13:18:07 UTC (rev 2866) @@ -60,7 +60,7 @@ */ public static void main(String[] args) { - File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml"); + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-test-questions-tagged(ideal).xml"); List<String> questions = readQuestions(file); StringBuilder successful = new StringBuilder(); Modified: trunk/components-ext/successful.txt =================================================================== --- trunk/components-ext/successful.txt 2011-06-10 14:37:59 UTC (rev 2865) +++ trunk/components-ext/successful.txt 2011-06-13 13:18:07 UTC (rev 2866) @@ -1,177 +1,396 @@ ***************************************************************** -Give/VB me/PRP all/DT school/NN types/NNS +Which/WDT presidents/NNS of/IN the/DT United/NNP States/NNPS had/VBD more/JJR than/IN three/CD children/NNS >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x ?p6 ?v1 . + ?x ?p5 ?y . + ?v0 rdf:type ?p4 . + ?x rdf:type ?p3 . + FILTER(?c > 3) . +} + +>> SLOTS: +v1: RESOURCE {United States} +p3: CLASS {presidents,president} +p4: CLASS {children,child,kid,youngster,minor} +p5: PROPERTY {had} +p6: PROPERTY {} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x ?p2 ?y . + ?v1 ?p0 ?x . + ?v0 rdf:type ?p1 . + FILTER(?c > 3) . +} + +>> SLOTS: +v1: RESOURCE {United States} +p0: PROPERTY {presidents,president} +p1: CLASS {children,child,kid,youngster,minor} +p2: PROPERTY {had} +***************************************************************** +Give/VB me/PRP the/DT official/JJ websites/NNS of/IN actors/NNS of/IN the/DT television/NN show/NN Charmed/VBN +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + SELECT ?y WHERE { - ?y rdf:type ?p0 . + ?y ?p5 ?j . + ?v3 ?p4 ?v0 . + ?v3 ?p8 ?y . + ?v3 rdf:type ?p7 . + ?v0 rdf:type ?p6 . } >> SLOTS: -p0: CLASS {school types} +v0: RESOURCE {Charmed} +p4: PROPERTY {} +p5: PROPERTY {official} +p6: CLASS {television show} +p7: CLASS {actors,actor,histrion,player,thespian} +p8: PROPERTY {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p9 ?j . + ?v0 rdf:type ?p11 . + ?v0 ?p12 ?v3 . + ?y rdf:type ?p13 . + ?y ?p10 ?v3 . +} + +>> SLOTS: +v0: RESOURCE {Charmed} +p9: PROPERTY {official} +p10: PROPERTY {} +p11: CLASS {television show} +p12: PROPERTY {actors,actor,histrion,player,thespian} +p13: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?v0 rdf:type ?p16 . + ?y rdf:type ?p18 . + ?y ?p15 ?j . + ?v3 rdf:type ?p17 . + ?v3 ?p14 ?v0 . + ?y ?p14 ?v3 . +} + +>> SLOTS: +v0: RESOURCE {Charmed} +p14: PROPERTY {} +p15: PROPERTY {official} +p16: CLASS {television show} +p17: CLASS {actors,actor,histrion,player,thespian} +p18: CLASS {websites,website,site} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?y ?p0 ?j . + ?v0 ?p2 ?v3 . + ?v3 ?p3 ?y . + ?v0 rdf:type ?p1 . +} + +>> SLOTS: +v0: RESOURCE {Charmed} +p0: PROPERTY {official} +p1: CLASS {television show} +p2: PROPERTY {actors,actor,histrion,player,thespian} +p3: PROPERTY {websites,website,site} ***************************************************************** -Who/WP are/VBP the/DT presidents/NNS of/IN the/DT United/NNP States/NNPS +Who/WP is/VBZ the/DT daughter/NN of/IN Bill/NNP Clinton/NNP married/VBN to/TO >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?v0 ?p0 ?y . + ?v0 ?p1 ?x . + ?x ?p0 ?y . } >> SLOTS: -v0: RESOURCE {United States} -p0: PROPERTY {presidents,president} +v0: RESOURCE {Bill Clinton} +p0: PROPERTY {married} +p1: PROPERTY {daughter,girl} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p2 ?v0 . + ?x ?p4 ?v0 . + ?x rdf:type ?p2 . + ?x ?p3 ?y . } >> SLOTS: -v0: RESOURCE {United States} -p1: CLASS {presidents,president} -p2: PROPERTY {} +v0: RESOURCE {Bill Clinton} +p2: CLASS {daughter,girl} +p3: PROPERTY {married} +p4: PROPERTY {} ***************************************************************** -Who/WP was/VBD the/DT wife/NN of/IN President/NNP Lincoln/NNP +Which/WDT river/NN does/VBZ the/DT Brooklyn/NNP Bridge/NNP cross/VB >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p2 ?v0 . + ?v0 ?p3 ?y . + ?v0 rdf:type ?p2 . } >> SLOTS: -v0: RESOURCE {President Lincoln} -p1: CLASS {wife} +v0: RESOURCE {Brooklyn Bridge} +p2: CLASS {river} +p3: PROPERTY {cross} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?x ?p1 ?y . + ?y rdf:type ?p0 . +} + +>> SLOTS: +x: RESOURCE {Brooklyn Bridge} +p0: CLASS {river} +p1: PROPERTY {cross} +***************************************************************** +How/WRB many/JJ monarchical/JJ countries/NNS are/VBP there/RB in/IN Europe/NNP +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +ASK WHERE { + ?y ?p1 ?j . + ?y rdf:type ?p0 . + ?y ?p2 ?v1 . + FILTER(?y == ?y) . +} + +>> SLOTS: +p0: CLASS {countries,state,nation,country,land} +p1: PROPERTY {monarchical} p2: PROPERTY {} +***************************************************************** +Where/WRB did/VBD Abraham/NNP Lincoln/NNP die/VB >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { + ?x ?p0 ?y . +} + +>> SLOTS: +x: RESOURCE {Abraham Lincoln} +p0: PROPERTY {diePlace} +***************************************************************** +Is/VBZ the/DT wife/NN of/IN President/NNP Obama/NNP called/VBD Michelle/NNP +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +ASK WHERE { ?v0 ?p0 ?y . + ?y ?p1 'michelle' . } >> SLOTS: -v0: RESOURCE {President Lincoln} +v0: RESOURCE {President Obama} p0: PROPERTY {wife} +p1: PROPERTY {title,name} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +ASK WHERE { + ?y rdf:type ?p2 . + ?y ?p4 'michelle' . + ?y ?p3 ?v0 . +} + +>> SLOTS: +v0: RESOURCE {President Obama} +p2: CLASS {wife} +p3: PROPERTY {} +p4: PROPERTY {title,name} ***************************************************************** -Who/WP developed/VBD the/DT video/NN game/NN World/NN of/IN Warcraft/NNP +Which/WDT states/NNS of/IN Germany/NNP are/VBP governed/VBN by/IN the/DT Social/NNP Democratic/NNP Party/NNP >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?x WHERE { - ?v1 rdf:type ?p6 . - ?x ?p4 ?v1 . - ?v1 rdf:type ?p3 . - ?v1 ?p5 ?v0 . + ?v0 ?p1 ?x . + ?y ?p0 ?x . } >> SLOTS: -v0: RESOURCE {Warcraft} -p3: CLASS {video game} -p4: PROPERTY {developed} -p5: PROPERTY {} -p6: CLASS {World,universe,existence,creation,world} +y: RESOURCE {Social Democratic Party} +v0: RESOURCE {Germany} +p0: PROPERTY {governed} +p1: PROPERTY {states,state,province} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?x WHERE { - ?x ?p1 ?v1 . - ?v1 rdf:type ?p0 . - ?v0 ?p2 ?v1 . + ?x ?p4 ?v0 . + ?x rdf:type ?p3 . + ?y ?p2 ?x . } >> SLOTS: -v0: RESOURCE {Warcraft} -p0: CLASS {video game} -p1: PROPERTY {developed} -p2: PROPERTY {World,universe,existence,creation,world} +y: RESOURCE {Social Democratic Party} +v0: RESOURCE {Germany} +p2: PROPERTY {governed} +p3: CLASS {states,state,province} +p4: PROPERTY {} ***************************************************************** -What/WP is/VBZ the/DT official/JJ website/NN of/IN Tom/NNP Hanks/NNP +Which/WDT US/NNP states/NNS possess/VBP gold/NN minerals/NNS >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +SELECT ?x WHERE { + ?x rdf:type ?p2 . + ?x ?p1 ?j . + ?y rdf:type ?p3 . + ?x ?p0 ?y . +} + +>> SLOTS: +p0: PROPERTY {possess} +p1: PROPERTY {US} +p2: CLASS {states,state,province} +p3: CLASS {gold minerals} +***************************************************************** +In/IN which/WDT country/NN does/VBZ the/DT Nile/NNP start/VB +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + SELECT ?y WHERE { - ?y rdf:type ?p2 . - ?y ?p4 ?j . - ?y ?p3 ?v1 . + ?v0 ?p3 ?y . + ?v0 rdf:type ?p2 . } >> SLOTS: -v1: RESOURCE {Tom Hanks} -p2: CLASS {website,site} -p3: PROPERTY {} -p4: PROPERTY {official} +v0: RESOURCE {Nile} +p2: CLASS {country,state,nation,land,commonwealth} +p3: PROPERTY {start} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?v1 ?p1 ?y . - ?y ?p0 ?j . + ?y rdf:type ?p1 . + ?x ?p0 ?y . } >> SLOTS: -v1: RESOURCE {Tom Hanks} -p0: PROPERTY {official} -p1: PROPERTY {website,site} +x: RESOURCE {Nile} +p0: PROPERTY {start} +p1: CLASS {country,state,nation,land,commonwealth} ***************************************************************** -Who/WP produced/VBD the/DT most/JJS films/NNS +Which/WDT locations/NNS have/VBP more/JJR than/IN two/CD caves/NNS >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT COUNT(?y) AS c2 WHERE { - ?x ?p1 ?y . +SELECT ?x COUNT(?v0) AS ?c WHERE { + ?x rdf:type ?p2 . + ?x ?p0 ?y . + ?v0 rdf:type ?p1 . + FILTER(?c > 2) . +} + +>> SLOTS: +p0: PROPERTY {have} +p1: CLASS {caves,cave} +p2: CLASS {locations,location} +***************************************************************** +Is/VBZ proinsulin/NNP a/DT protein/NN +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +ASK WHERE { ?y rdf:type ?p0 . + FILTER(?y == ?y) . } -ORDER BY DESC(?c2) -LIMIT 1 OFFSET 0 >> SLOTS: -p0: CLASS {films,movie,film,picture,pic} -p1: PROPERTY {produced} +y: RESOURCE {proinsulin} +p0: CLASS {protein} ***************************************************************** -Which/WDT mountains/NNS are/VBP higher/JJR than/IN the/DT Nanga/NNP Parbat/NNP +Which/WDT classis/NN does/VBZ the/DT Millepede/NNP belong/VBP to/TO >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y ?p1 ?j . - ?v0 ?p1 ?i . + ?v0 ?p3 ?y . + ?v0 rdf:type ?p2 . +} + +>> SLOTS: +v0: RESOURCE {Millepede} +p2: CLASS {classis} +p3: PROPERTY {belong} +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?y WHERE { + ?x ?p1 ?y . ?y rdf:type ?p0 . - FILTER(?j > ?i) . } >> SLOTS: -v0: RESOURCE {Nanga Parbat} -p0: CLASS {mountains,mountain,mount} -p1: PROPERTY {higherdegree} +x: RESOURCE {Millepede} +p0: CLASS {classis} +p1: PROPERTY {belong} ***************************************************************** -Who/WP created/VBD English/NNP Wikipedia/NNP +Who/WP created/VBD Goofy/NNP >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -182,79 +401,76 @@ } >> SLOTS: -y: RESOURCE {English Wikipedia} +y: RESOURCE {Goofy} p0: PROPERTY {created} ***************************************************************** -Give/VB me/PRP all/DT actors/NNS starring/VBG in/IN Batman/NNP Begins/NNPS +Give/VB me/PRP the/DT capitals/NNS of/IN all/DT US/NNP states/NNS >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y WHERE { - ?y rdf:type ?p1 . - ?y ?p0 ?v1 . + ?v0 rdf:type ?p3 . + ?v0 ?p5 ?j . + ?y ?p6 ?v0 . + ?y rdf:type ?p4 . } >> SLOTS: -v1: RESOURCE {Batman Begins} -p0: PROPERTY {starring} -p1: CLASS {actors,actor,histrion,player,thespian} -***************************************************************** -Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP +p3: CLASS {states,state,province} +p4: CLASS {capitals,capital} +p5: PROPERTY {US} +p6: PROPERTY {} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?x WHERE { - ?y ?p0 ?v1 . - ?x rdf:type ?p2 . - ?y ?p1 ?x . - ?y rdf:type ?p3 . +SELECT ?y WHERE { + ?y ?p13 ?v0 . + ?v0 rdf:type ?p11 . + ?y rdf:type ?p10 . + ?v0 ?p12 ?j . } >> SLOTS: -v1: RESOURCE {California} -p0: PROPERTY {founded} -p1: PROPERTY {developed} -p2: CLASS {software,package} -p3: CLASS {organizations,organization,organisation} -***************************************************************** -Is/VBZ Christian/NNP Bale/NNP starring/VBG in/IN Batman/NNP Begins/NNPS +p10: CLASS {capitals,capital} +p11: CLASS {states,state,province} +p12: PROPERTY {US} +p13: PROPERTY {} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -ASK WHERE { - ?y ?p0 ?v0 . +SELECT ?y WHERE { + ?v0 ?p7 ?y . + ?v0 rdf:type ?p8 . + ?v0 ?p9 ?j . } >> SLOTS: -y: RESOURCE {Christian Bale} -v0: RESOURCE {Batman Begins} -p0: PROPERTY {starring} -***************************************************************** -Give/VB me/PRP the/DT websites/NNS of/IN companies/NNS with/IN more/JJR than/IN 500000/CD employees/NNS +p7: PROPERTY {capitals,capital} +p8: CLASS {states,state,province} +p9: PROPERTY {US} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -SELECT ?y COUNT(?v0) AS ?c WHERE { - ?v0 rdf:type ?p9 . - ?v1 ?p10 ?y . - ?v1 rdf:type ?p12 . - ?v1 ?p11 ?v2 . - FILTER(?c > 500000) . +SELECT ?y WHERE { + ?v0 rdf:type ?p0 . + ?v0 ?p1 ?y . + ?v0 ?p2 ?j . } >> SLOTS: -p9: CLASS {employees,employee} -p10: PROPERTY {websites,website,site} -p11: PROPERTY {} -p12: CLASS {companies,company} +p0: CLASS {states,state,province} +p1: PROPERTY {capitals,capital} +p2: PROPERTY {US} +***************************************************************** +Give/VB me/PRP all/DT cities/NNS in/IN New/NNP Jersey/NNP with/IN more/JJR than/IN 100000/CD inhabitants/NNS >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> @@ -262,577 +478,753 @@ SELECT ?y COUNT(?v0) AS ?c WHERE { ?y ?p6 ?v3 . - ?y ?p4 ?v1 . - ?y rdf:type ?p8 . + ?v3 ?p4 ?v2 . ?v0 rdf:type ?p5 . - ?v3 rdf:type ?p7 . - FILTER(?c > 500000) . + ?y rdf:type ?p7 . + FILTER(?c > 100000) . } >> SLOTS: +v3: RESOURCE {New Jersey} p4: PROPERTY {} -p5: CLASS {employees,employee} +p5: CLASS {inhabitants,inhabitant,habitant,dweller,denizen} p6: PROPERTY {} -p7: CLASS {companies,company} -p8: CLASS {websites,website,site} +p7: CLASS {cities,city,metropolis} >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?y COUNT(?v0) AS ?c WHERE { + ?y ?p1 ?v1 . + ?v0 rdf:type ?p0 . + ?y rdf:type ?p2 . + ?y ?p3 ?v3 . + FILTER(?c > 100000) . +} + +>> SLOTS: +p0: CLASS {inhabitants,inhabitant,habitant,dweller,denizen} +p1: PROPERTY {} +p2: CLASS {cities,city,metropolis} +p3: PROPERTY {} +***************************************************************** +Which/WDT museum/NN exhibits/VBZ The/DT Scream/NNP by/IN Munch/NNP +>> QUERY: + +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + +SELECT ?x WHERE { ?y ?p0 ?v1 . - ?v3 ?p3 ?y . - ?v3 rdf:type ?p2 . - ?v0 rdf:type ?p1 . - FILTER(?c > 500000) . + ?x rdf:type ?p2 . + ?x ?p1 ?y . } >> SLOTS: +y: RESOURCE {Scream} +v1: RESOURCE {Munch} p0: PROPERTY {} -p1: CLASS {employees,employee} -p2: CLASS {companies,company} -p3: PROPERTY {websites,website,site} +p1: PROPERTY {exhibits} +p2: CLASS {museum} +***************************************************************** +What/WP is/VBZ the/DT revenue/NN of/IN IBM/NNP >> QUERY: PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-s... [truncated message content] |
From: <lor...@us...> - 2011-06-14 04:11:29
|
Revision: 2870 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2870&view=rev Author: lorenz_b Date: 2011-06-14 04:11:22 +0000 (Tue, 14 Jun 2011) Log Message: ----------- Switched to new SOLR libs and index built on this version. Made NER case insensitive. Some small changes in Eval script. Set max query execution time to 10 s. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-06-13 21:32:50 UTC (rev 2869) +++ trunk/components-ext/pom.xml 2011-06-14 04:11:22 UTC (rev 2870) @@ -46,7 +46,7 @@ <dependency> <groupId>org.apache.solr</groupId> <artifactId>solr-core</artifactId> - <version>1.4.1</version> + <version>3.1.0</version> <type>jar</type> <scope>compile</scope> </dependency> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-13 21:32:50 UTC (rev 2869) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-14 04:11:22 UTC (rev 2870) @@ -8,6 +8,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -57,12 +58,13 @@ private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); private static final int TOP_K = 5; - private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-1.4.1"; + private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-3.1.0"; private static final int RECURSION_DEPTH = 2; private Ranking ranking = Ranking.SIMILARITY; private boolean useRemoteEndpointValidation = true; private boolean stopIfQueryResultNotEmpty = true; + private int maxQueriesPerTemplate = 25; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -83,13 +85,16 @@ private Map<String, List<String>> learnedSPARQLQueries; private Set<Template> templates; - private Collection<? extends Query> sparqlQueryCandidates; + private Collection<Query> sparqlQueryCandidates; + private Map<Template, Collection<? extends Query>> template2Queries; private Map<String, String> prefixMap; private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); + private int maxQueryExecutionTimeInSeconds = 10; + public SPARQLTemplateBasedLearner(){ resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); resource_index.setHitsPerPage(TOP_K); @@ -113,6 +118,8 @@ modelGenenerator = new ModelGenerator(endpoint, predicateFilters); templateGenerator = new Templator(); + + cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); } public void setEndpoint(SparqlEndpoint endpoint){ @@ -131,6 +138,14 @@ this.useRemoteEndpointValidation = useRemoteEndpointValidation; } + public int getMaxQueryExecutionTimeInSeconds() { + return maxQueryExecutionTimeInSeconds; + } + + public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { + this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; + } + public void setRanking(Ranking ranking) { this.ranking = ranking; } @@ -154,8 +169,9 @@ logger.info(t); } - //generate SPARQL query candidates - sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); + //generate SPARQL query candidates, but select only a fixed number per template + template2Queries = getSPARQLQueryCandidates(templates, ranking); + sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); //test candidates if(useRemoteEndpointValidation){ //on remote endpoint @@ -183,7 +199,10 @@ //generate SPARQL query candidates logger.info("Generating SPARQL query candidates..."); mon.start(); - sparqlQueryCandidates = getSPARQLQueryCandidates(templates, ranking); + Map<Template, Collection<? extends Query>> template2Queries = getSPARQLQueryCandidates(templates, ranking); + sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); + + mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); @@ -208,6 +227,10 @@ return queries; } + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ + return template2Queries; + } + private Model getWorkingModel(List<String> resources){ logger.info("Generating local model..."); mon.start(); @@ -223,7 +246,7 @@ return workingModel; } - private Collection<? extends Query> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ + private Map<Template,Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ switch(ranking){ case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); @@ -232,13 +255,15 @@ } } - private Set<Query> getSPARQLQueryCandidates(Set<Template> templates){ + private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); Set<Query> queries = new HashSet<Query>(); - + Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); for(Template template : templates){ + queries = new HashSet<Query>(); queries.add(template.getQuery()); + template2Queries.put(template, queries); for(Slot slot : template.getSlots()){ Set<Query> tmp = new HashSet<Query>(); String var = slot.getAnchor(); @@ -258,7 +283,7 @@ } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - return queries; + return template2Queries; } private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ @@ -290,15 +315,18 @@ return query2Score; } - private Set<RatedQuery> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ + private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); + Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); Query query; for(Template template : templates){ query = template.getQuery(); + ratedQueries = new TreeSet<RatedQuery>(); ratedQueries.add(new RatedQuery(query, 0)); + template2Queries.put(template, ratedQueries); for(Slot slot : template.getSlots()){ Set<RatedQuery> tmp = new HashSet<RatedQuery>(); String var = slot.getAnchor(); @@ -319,16 +347,19 @@ } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - return ratedQueries; + return template2Queries; } - private List<Query> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ + private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); List<Query> queries = new ArrayList<Query>(); + Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); List<String> uriCandidates; for(Template template : templates){ + queries = new ArrayList<Query>(); queries.add(template.getQuery()); + template2Queries.put(template, queries); for(Slot slot : template.getSlots()){ List<Query> tmp = new ArrayList<Query>(); String var = slot.getAnchor(); @@ -364,7 +395,7 @@ } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - return queries; + return template2Queries; } private Set<String> getCandidateURIs(Slot slot){ @@ -520,6 +551,22 @@ return uri2Score; } + private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ + List<Query> queries = new ArrayList<Query>(); + for(Entry<Template, Collection<? extends Query>> entry : template2Queries.entrySet()){ + int max = Math.min(maxQueriesPerTemplate, entry.getValue().size()); + int i = 0; + for(Query q : entry.getValue()){ + queries.add(q); + i++; + if(i == max){ + break; + } + } + } + return queries; + } + private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); for(Query query : queries){ @@ -609,7 +656,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who are the presidents of the United States?"; + String question = "Give me all school types."; // String question = "Give me all films starring Brad Pitt"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-13 21:32:50 UTC (rev 2869) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-14 04:11:22 UTC (rev 2870) @@ -15,7 +15,7 @@ static final String[] genericReplacements = { "\"", "", "'", "", "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; - static NER ner = new LingPipeNER(); + static NER ner = new LingPipeNER(false);//not case sensitive best solution? public Preprocessor() { } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-06-13 21:32:50 UTC (rev 2869) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-06-14 04:11:22 UTC (rev 2870) @@ -28,7 +28,7 @@ } public LingPipeNER(boolean caseSensitive) { - this(caseSensitive, true); + this(caseSensitive, false); } public LingPipeNER(boolean caseSensitive, boolean allMatches) { Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-13 21:32:50 UTC (rev 2869) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-14 04:11:22 UTC (rev 2870) @@ -1,15 +1,14 @@ package org.dllearner.algorithm.tbsl; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.io.Writer; import java.net.URLDecoder; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; @@ -29,6 +28,7 @@ import org.apache.log4j.PatternLayout; import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; +import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.util.LatexWriter; import org.w3c.dom.DOMException; @@ -148,6 +148,9 @@ answer = endpoint.executeAsk(query); } else { answer = new HashSet<String>(); + if(!query.contains("LIMIT")){ + query = query + " LIMIT 200"; + } ResultSet rs = endpoint.executeSelect(query); String variable; if(rs.getResultVars().size() == 1){ @@ -182,7 +185,7 @@ public void run(){ - int topN2Print = 25; + int topN2Print = 10; int questionId; @@ -193,7 +196,7 @@ latex.beginDocument(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ - if(i++ == 1)break; +// if(i++ == 1)break; try { questionId = entry.getKey(); question = entry.getValue(); @@ -226,10 +229,8 @@ if(learnedQuery != null){ learnedAnswer = getAnswerForSPARQLQuery(learnedQuery, "y"); } - //get the generated SPARQL query candidates - List<String> queries = stbl.getGeneratedSPARQLQueries(); //get the used templates - Set<Template> templates = stbl.getTemplates(); + List<Template> templates = new ArrayList<Template>(stbl.getTemplates()); //start output //write templates subsection @@ -242,29 +243,33 @@ } latex.endEnumeration(); + //get the generated SPARQL query candidates + Map<Template, Collection<? extends Query>> template2Queries = stbl.getTemplates2SPARQLQueries(); + //write generated queries subsection - latex.beginSubsection("Top " + topN2Print + " generated queries (max. " + queries.size() + ")"); - logger.info("LEARNED QUERIES(#" + queries.size() + "):\n"); - int cnt = 1; - if(!queries.isEmpty()){ - latex.beginEnumeration(); + latex.beginSubsection("Top " + topN2Print + " generated queries per template"); + int k = 1; + List<Query> queries; + for(Template t : templates){ + latex.beginSubSubsection("Template " + k); + queries = new ArrayList<Query>(template2Queries.get(t)); + if(!queries.isEmpty()){ + latex.beginEnumeration(); + } + //print top n queries to latex file + int max = Math.min(topN2Print, queries.size()); + for(int j = 0; j < max; j++){ + latex.beginEnumerationItem(); + latex.addListing(queries.get(j).toString()); + latex.endEnumerationItem(); + } + if(!queries.isEmpty()){ + latex.endEnumeration(); + } + k++; } - //print queries to log file - for(String q : queries){ - logger.info("QUERY " + cnt++ + ":\n" + q + "\n"); - logger.info("--------"); - } - //print top n queries to latex file - int max = Math.min(topN2Print, queries.size()); - for(int j = 0; j < max; j++){ - latex.beginEnumerationItem(); - latex.addListing(queries.get(j)); - latex.endEnumerationItem(); - } - if(!queries.isEmpty()){ - latex.endEnumeration(); - } + //write solution subsection if exists if(learnedQuery != null){ latex.beginSubsection("Solution"); @@ -299,7 +304,7 @@ latex.beginDocument(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ - if(i++ == 1)break; +// if(i++ == 1)break; try { questionId = entry.getKey(); question = entry.getValue(); @@ -340,7 +345,7 @@ latex.endEnumeration(); //write generated queries subsection - latex.beginSubsection("Top " + topN2Print + " generated queries (max. " + queries.size() + ")"); + latex.beginSubsection("Top " + topN2Print + " generated queries per template"); logger.info("LEARNED QUERIES(#" + queries.size() + "):\n"); int cnt = 1; if(!queries.isEmpty()){ @@ -385,13 +390,14 @@ if(target.contains(s)){ s = "\\textcolor{green}{" + s + "}"; } - sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); +// sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); + sb.append(s.replace("_", "\\_").replace("&", "\\&").replace("%", "\\%").replace("#", "\\#").replace("http://dbpedia.org/resource/", "")).append(", "); if(i % 2 == 0){ sb.append("\n"); } i++; } - } catch (UnsupportedEncodingException e) { + } catch (Exception e) { e.printStackTrace(); } return sb.toString(); @@ -407,13 +413,14 @@ try { int i = 1; for(String s : (Collection<String>)learnedAnswer){ - sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); +// sb.append(URLDecoder.decode(s, "UTF-8").replace("_", "\\_").replace("http://dbpedia.org/resource/", "")).append(", "); + sb.append(s.replace("_", "\\_").replace("&", "\\&").replace("%", "\\%").replace("#", "\\#").replace("http://dbpedia.org/resource/", "")).append(", "); if(i % 2 == 0){ sb.append("\n"); } i++; } - } catch (UnsupportedEncodingException e) { + } catch (Exception e) { e.printStackTrace(); } return sb.toString(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-06-15 14:35:43
|
Revision: 2882 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2882&view=rev Author: lorenz_b Date: 2011-06-15 14:35:36 +0000 (Wed, 15 Jun 2011) Log Message: ----------- Added subsection for entity coverage to log output. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Added Paths: ----------- trunk/components-ext/run_eval_testset trunk/components-ext/run_eval_trainset Added: trunk/components-ext/run_eval_testset =================================================================== --- trunk/components-ext/run_eval_testset (rev 0) +++ trunk/components-ext/run_eval_testset 2011-06-15 14:35:36 UTC (rev 2882) @@ -0,0 +1 @@ +mvn -e exec:java -Dexec.mainClass="org.dllearner.algorithm.tbsl.Evaluation" -Dexec.classpathScope="test" -Dexec.args="tbsl/evaluation/dbpedia-test-questions.xml" Property changes on: trunk/components-ext/run_eval_testset ___________________________________________________________________ Added: svn:executable + * Added: trunk/components-ext/run_eval_trainset =================================================================== --- trunk/components-ext/run_eval_trainset (rev 0) +++ trunk/components-ext/run_eval_trainset 2011-06-15 14:35:36 UTC (rev 2882) @@ -0,0 +1 @@ +mvn -e exec:java -Dexec.mainClass="org.dllearner.algorithm.tbsl.Evaluation" -Dexec.classpathScope="test" -Dexec.args="tbsl/evaluation/dbpedia-train.xml" Property changes on: trunk/components-ext/run_eval_trainset ___________________________________________________________________ Added: svn:executable + * Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-15 14:10:50 UTC (rev 2881) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-15 14:35:36 UTC (rev 2882) @@ -86,6 +86,7 @@ private Set<Template> templates; private Collection<Query> sparqlQueryCandidates; private Map<Template, Collection<? extends Query>> template2Queries; + private Map<Slot, List<String>> slot2URI; private Map<String, String> prefixMap; @@ -113,6 +114,7 @@ prefixMap.put("http://dbpedia.org/property/", "dbp"); prefixMap.put("http://dbpedia.org/resource/", "dbr"); prefixMap.put(FOAF.getURI(), "foaf"); + prefixMap.put("http://dbpedia.org/class/yago/", "yago"); modelGenenerator = new ModelGenerator(endpoint, predicateFilters); @@ -157,11 +159,17 @@ this.ranking = ranking; } - public void learnSPARQLQueries() throws NoTemplateFoundException{ + private void reset(){ learnedSPARQLQueries = new HashMap<String, List<String>>(); resourcesURICache = new HashMap<String, List<String>>(); classesURICache = new HashMap<String, List<String>>(); propertiesURICache = new HashMap<String, List<String>>(); + template2Queries = new HashMap<Template, Collection<? extends Query>>(); + slot2URI = new HashMap<Slot, List<String>>(); + } + + public void learnSPARQLQueries() throws NoTemplateFoundException{ + reset(); //generate SPARQL query templates logger.info("Generating SPARQL query templates..."); mon.start(); @@ -238,6 +246,10 @@ return template2Queries; } + public Map<Slot, List<String>> getSlot2URIs(){ + return slot2URI; + } + private Model getWorkingModel(List<String> resources){ logger.info("Generating local model..."); mon.start(); @@ -439,7 +451,7 @@ SortedSet<String> tmp; List<String> uris; - //prune the word list with only when slot type is not RESOURCE + //prune the word list only when slot type is not RESOURCE List<String> words; if(slot.getSlotType() == SlotType.RESOURCE){ words = slot.getWords(); @@ -458,6 +470,7 @@ sortedURIs.addAll(tmp); tmp.clear(); } + slot2URI.put(slot, sortedURIs); mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("URIs: " + sortedURIs); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java 2011-06-15 14:10:50 UTC (rev 2881) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Triple.java 2011-06-15 14:35:36 UTC (rev 2882) @@ -31,6 +31,17 @@ this.value = value; } + public void reverse(){ + SPARQL_Term newVariable = new SPARQL_Term(this.value.getName()); + newVariable.setIsVariable(this.value.isVariable()); + this.variable = newVariable; + + SPARQL_Value newValue = new SPARQL_Value(this.variable.getName()); + newValue.setIsVariable(this.variable.isVariable()); + this.value = newValue; + + } + @Override public String toString() { if (optional) { Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-15 14:10:50 UTC (rev 2881) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-15 14:35:36 UTC (rev 2882) @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -17,6 +18,8 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -33,6 +36,7 @@ import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.util.LatexWriter; import org.w3c.dom.DOMException; @@ -41,9 +45,15 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import scala.actors.threadpool.Arrays; + import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.sparql.sse.builders.BuilderExpr.Build; +import com.hp.hpl.jena.sparql.vocabulary.FOAF; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; public class Evaluation{ @@ -59,6 +69,7 @@ private SPARQLTemplateBasedLearner stbl; private int testID = -1; + private Map<String, String> prefixMap; public Evaluation(File ... evaluationFiles) throws FileNotFoundException, IOException{ for(File file : evaluationFiles){ @@ -67,6 +78,15 @@ stbl = new SPARQLTemplateBasedLearner(); init(); + + prefixMap = new HashMap<String, String>(); + prefixMap.put("rdf", RDF.getURI()); + prefixMap.put("rdfs", RDFS.getURI()); + prefixMap.put("onto", "http://dbpedia.org/ontology/"); + prefixMap.put("prop", "http://dbpedia.org/property/"); + prefixMap.put("res", "http://dbpedia.org/resource/"); + prefixMap.put("foaf", FOAF.getURI()); + prefixMap.put("yago", "http://dbpedia.org/class/yago/"); } public void init() throws FileNotFoundException, IOException{ @@ -319,6 +339,53 @@ k++; } + //get the URIs for each template slot + latex.beginSubsection("Covered entities"); + Map<Slot, List<String>> slot2URIsMap = stbl.getSlot2URIs(); +// Map<List<String>, List<String>> tokens2URIs = new HashMap<List<String>, List<String>>(); +// for(Entry<Slot, List<String>> slot2URI : slot2URIsMap.entrySet()){ +// tokens2URIs.put(slot2URI.getKey().getWords(), value) +// if(slot2URI.getValue().contains(getFullEntity(entity))){ +// covered = true; +// break; +// } +// } + List<String> targetEntities = extractEntities(targetQuery); + Map<String, Boolean> coveredEntitiesMap = new HashMap<String, Boolean>(); + for(String entity : targetEntities){ + boolean covered = false; + for(Entry<Slot, List<String>> slot2URI : slot2URIsMap.entrySet()){ + if(slot2URI.getValue().contains(getFullURI(entity))){ + covered = true; + break; + } + } + + coveredEntitiesMap.put(entity, covered); + } + latex.beginSubSubsection("Target entities"); + StringBuilder sb = new StringBuilder(); + sb.append("\\begin{tabular}{| l | c |}\\hline\n"); + for(Entry<String, Boolean> e : coveredEntitiesMap.entrySet()){ + sb.append(escapeString(e.getKey())).append(" & ").append(e.getValue()).append("\\\\\\hline\n"); + } + sb.append("\\end{tabular}\n"); + latex.addText(sb.toString()); + latex.beginSubSubsection("Keyword -> URIs"); + sb = new StringBuilder(); + sb.append("\\begin{tabular}{| l | l |}\\hline\n"); + for(Entry<Slot, List<String>> slot2URI : slot2URIsMap.entrySet()){ + if(!slot2URI.getKey().getWords().isEmpty()){ + StringBuilder uris = new StringBuilder(); + for(String uri : slot2URI.getValue()){ + uris.append(escapeString(getPrefixedURI(uri))).append(", "); + } + sb.append(slot2URI.getKey().getWords() + "[" + slot2URI.getKey().getSlotType() + "]").append(" & ").append(uris.toString()).append("\\\\\\hline\n"); + } + } + sb.append("\\end{tabular}\n"); + latex.addText(sb.toString()); + //write solution subsection if exists if(learnedQuery != null){ latex.beginSubsection("Solution"); @@ -344,6 +411,73 @@ latex.write("log/evaluation.tex"); } + public static List<String> extractEntities(String query){ + List<String> exclusions = Arrays.asList(new String[]{"rdf", "rdfs"}); + List<String> entities = new ArrayList<String>(); + //pattern to detect resources + Pattern pattern = Pattern.compile("(\\w+):(\\w+)"); + Matcher matcher = pattern.matcher(query); + String group; + while(matcher.find()){ + group = matcher.group(); + boolean add = true; + for(String ex : exclusions){ + if(group.contains(ex)){ + add = false; + break; + } + } + if(add){ + entities.add(group); + } + } + //pattern to detect string literals + pattern = Pattern.compile("'(\\w+)'@en"); + matcher = pattern.matcher(query); + while(matcher.find()){ + group = matcher.group(); + entities.add(buildEntityFromLabel(group)); + } + + return entities; + } + + private static String buildEntityFromLabel(String label){ + String base = "res:"; + String entity = label.substring(1).substring(0, label.lastIndexOf("'")-1).replace(" ", "_"); + return base + entity; + } + + private String getFullURI(String prefixedURI){ + String fullURI = prefixedURI; + String prefix; + String uri; + for(Entry<String, String> prefix2URI : prefixMap.entrySet()){ + prefix = prefix2URI.getKey(); + uri = prefix2URI.getValue(); + if(prefixedURI.startsWith(prefix)){ + fullURI = prefixedURI.replace(prefix + ":", uri); + break; + } + } + return fullURI; + } + + private String getPrefixedURI(String fullURI){ + String prefixedURI = fullURI; + String prefix; + String uri; + for(Entry<String, String> prefix2URI : prefixMap.entrySet()){ + prefix = prefix2URI.getKey(); + uri = prefix2URI.getValue(); + if(fullURI.startsWith(uri)){ + prefixedURI = fullURI.replace(uri, prefix + ":" ); + break; + } + } + return prefixedURI; + } + private double computeRecall(Object targetAnswer, Object learnedAnswer){ if(learnedAnswer == null){ return -1; @@ -503,6 +637,10 @@ } + private String escapeString(String str){ + return str.replace("_", "\\_").replace("&", "\\&").replace("%", "\\%").replace("#", "\\#"); + } + private String escapeAnswerString(Object learnedAnswer){ if(learnedAnswer instanceof Collection<?>){ StringBuilder sb = new StringBuilder(); @@ -546,6 +684,15 @@ File file = new File(Evaluation.class.getClassLoader().getResource(args[0]).getPath()); + System.out.println(Evaluation.extractEntities("SELECT DISTINCT ?uri ?string WHERE {" + + "?uri rdf:type onto:Person ." + + "?uri onto:birthPlace ?city ." + + "?city rdfs:label 'Heraklion'@en" + + "OPTIONAL {?uri rdfs:label ?string . " + + "FILTER (lang(?string) = 'en') }" + + "}}") + ); + Evaluation eval = new Evaluation(file); eval.run(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-07-21 11:36:59
|
Revision: 2954 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2954&view=rev Author: lorenz_b Date: 2011-07-21 11:36:47 +0000 (Thu, 21 Jul 2011) Log Message: ----------- Added configuration options via properties file for tbsl. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java trunk/components-ext/src/main/resources/tbsl/tbsl.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/pom.xml 2011-07-21 11:36:47 UTC (rev 2954) @@ -132,6 +132,11 @@ <version>1.0.14</version> </dependency> + <dependency> + <groupId>org.ini4j</groupId> + <artifactId>ini4j</artifactId> + <version>0.5.2</version> + </dependency> </dependencies> <build> <plugins> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,262 @@ +package org.dllearner.algorithm.qtl.filters; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.SortedSet; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; + + +import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; +import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein; +import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; + +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.util.iterator.Filter; + +public class QuestionBasedStatementFilter2 extends Filter<Statement> { + + private Set<String> questionWords; + + private AbstractStringMetric qGramMetric; + private AbstractStringMetric levensteinMetric; + private AbstractStringMetric jaroWinklerMetric; + private I_Sub substringMetric; + + private double threshold = 0.4; + + private int topK = 3; + private double topKSumThreshold = 0.8; + + private Map<Statement, Double> statement2Similarity = new HashMap<Statement, Double>(); + + private Map<RDFNode, Boolean> cache = new HashMap<RDFNode, Boolean>(); + + private Map<Statement, String> statement2TokenMap = new HashMap<Statement, String>(); + + private Map<String, String> resource2TokenMap = new HashMap<String, String>(); + + int cnt = 0; + + public QuestionBasedStatementFilter2(Set<String> questionWords){ + this.questionWords = questionWords; + qGramMetric = new QGramsDistance(); + levensteinMetric = new Levenshtein(); + jaroWinklerMetric = new JaroWinkler(); + substringMetric = new I_Sub(); + + } + + private boolean isSimiliar2QuestionWord(String s, Statement st){ + for(String word : questionWords){ + if(areSimiliar(word, s, st)){ + statement2TokenMap.put(st, word); + resource2TokenMap.put(s, word); + return true; + } + } + return isSimilarWithSubstringMetrik(s, st); + } + + private boolean areSimiliar(String s1, String s2, Statement st){ + return (qGramMetric.getSimilarity(s1, s2) >= threshold) || + (levensteinMetric.getSimilarity(s1, s2) >= threshold); + } + + private boolean isSimilarWithSubstringMetrik(String s, Statement st){ + SortedSet<Double> values = new TreeSet<Double>(Collections.reverseOrder()); + for(String word : questionWords){ + double v = substringMetric.score(word, s, true); + if(v >= threshold){statement2TokenMap.put(st, word);resource2TokenMap.put(s, word); + return true; + } else { + values.add(Double.valueOf(v)); + } + } + double sum = 0; + for(Double v : getTopK(values)){ + if(v >= 0){ + sum += v; + } + + } + if(sum >= topKSumThreshold){ + statement2TokenMap.put(st, "ALL"); + } + return sum >= topKSumThreshold; + } + + private Set<Double> getTopK(SortedSet<Double> values){ + Set<Double> top = new HashSet<Double>(); + int k = 0; + for(Double v : values){ + if(k == topK){ + break; + } + top.add(v); + k++; + } + return top; + } + + + private String getFragment(String uri){ + int i = uri.lastIndexOf("#"); + if(i > 0){ + return uri.substring(i+1); + } else { + return uri.substring(uri.lastIndexOf("/")+1); + } + } + + @Override + public boolean accept(Statement s) { + Boolean similarPredicate = cache.get(s.getPredicate()); + Boolean similarObject = cache.get(s.getObject()); + if(similarPredicate != null && similarObject != null){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + String token = resource2TokenMap.get(object); + if(token != null){ + statement2TokenMap.put(s, token); + } else { + token = resource2TokenMap.get(getFragment(s.getPredicate().getURI())); + if( token != null){ + statement2TokenMap.put(s, token); + } + } + + + return similarPredicate || similarObject; + } else if(similarPredicate == null && similarObject != null){ + if(similarObject){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + String token = resource2TokenMap.get(object); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + String predicate = getFragment(s.getPredicate().getURI()); + if (isSimiliar2QuestionWord(predicate, s)){ + cache.put(s.getPredicate(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getPredicate(), Boolean.valueOf(false)); + return false; + } + } + } else if(similarPredicate != null && similarObject == null){ + if(similarPredicate){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + String token = resource2TokenMap.get(object); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + } + + String token = resource2TokenMap.get(getFragment(s.getPredicate().getURI())); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + return false; + } + } + } else { + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + } + + String predicate = getFragment(s.getPredicate().getURI()); + if (isSimiliar2QuestionWord(predicate, s)){ + cache.put(s.getPredicate(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getPredicate(), Boolean.valueOf(false)); + } + return false; + } + } + +// @Override +// public boolean accept(Statement s) { +// String predicate = s.getPredicate().getURI().substring(s.getPredicate().getURI().lastIndexOf("/")); +// String object = null; +// if(s.getObject().isURIResource()){ +// object = s.getObject().asResource().getURI(); +// object = getFragment(s.getObject().asResource().getURI()); +// } else if(s.getObject().isLiteral()){ +// object = s.getObject().asLiteral().getLexicalForm(); +// } +// return isSimiliar2QuestionWord(predicate, s) || isSimiliar2QuestionWord(object, s); +// } + + public void setThreshold(double threshold){ + this.threshold = threshold; + } + + public double getThreshold(){ + return threshold; + } + + public Set<Statement> getStatementsBelowThreshold(double threshold){ + Set<Statement> statements = new HashSet<Statement>(); + for(Entry<Statement, Double> entry : statement2Similarity.entrySet()){ + if(entry.getValue().doubleValue() < threshold){ + statements.add(entry.getKey()); + } + } + return statements; + } + + public Map<Statement, String> getStatement2TokenMap() { + return statement2TokenMap; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,320 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithm.qtl.impl; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.dllearner.algorithm.qtl.QueryTreeFactory; +import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithm.qtl.filters.Filter; +import org.dllearner.algorithm.qtl.filters.Filters; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter2; +import org.dllearner.algorithm.qtl.filters.ZeroFilter; + +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Selector; +import com.hp.hpl.jena.rdf.model.SimpleSelector; +import com.hp.hpl.jena.rdf.model.Statement; + +/** + * + * @author Lorenz Bühmann + * + */ +public class QueryTreeFactoryImpl2 implements QueryTreeFactory<String> { + + private int nodeId; + private Comparator<Statement> comparator; + private Set<String> predicateFilters; + + private Filter predicateFilter = new ZeroFilter(); + private Filter objectFilter = new ZeroFilter(); + private Selector statementSelector = new SimpleSelector(); + private com.hp.hpl.jena.util.iterator.Filter<Statement> keepFilter; + + public QueryTreeFactoryImpl2(){ + comparator = new StatementComparator(); + predicateFilters = new HashSet<String>(Filters.getAllFilterProperties()); + } + + public void setPredicateFilter(Filter filter){ + this.predicateFilter = filter; + } + + public void setObjectFilter(Filter filter){ + this.objectFilter = filter; + } + + @Override + public void setStatementSelector(Selector selector) { + this.statementSelector = selector; + + } + + @Override + public void setStatementFilter(com.hp.hpl.jena.util.iterator.Filter<Statement> statementFilter) { + this.keepFilter = statementFilter; + + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example, Model model) { + if(keepFilter == null){ + return createTree(model.getResource(example), model); + } else { + return createTreeOptimized(model.getResource(example), model); + } + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example, Model model, int maxEdges) { + if(keepFilter == null){ + return createTree(model.getResource(example), model); + } else { + return createTreeOptimized(model.getResource(example), model, maxEdges); + } + } + + @Override + public QueryTreeImpl<String> getQueryTree(Resource example, Model model) { + return createTree(example, model); + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example) { + return new QueryTreeImpl<String>(example); + } + + private QueryTreeImpl<String> createTreeOptimized(Resource s, Model model, int maxEdges){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + fillMap(s, model, resource2Statements, null); + + QuestionBasedStatementFilter filter = (QuestionBasedStatementFilter)keepFilter; + Set<Statement> statements; + int diff = valueCount(resource2Statements) - maxEdges; + main:while(diff > 0){ + double oldThreshold = filter.getThreshold(); + statements = filter.getStatementsBelowThreshold(oldThreshold+0.1); + for(SortedSet<Statement> set : resource2Statements.values()){ + for(Statement st : statements){ + if(set.remove(st)){ + diff--; + if(diff == 0){ + break main; + } + } + } + } + } + + + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private int valueCount(SortedMap<String, SortedSet<Statement>> map){ + int cnt = 0; + for(SortedSet<Statement> statements : map.values()){ + cnt += statements.size(); + } + return cnt; + } + + private QueryTreeImpl<String> createTreeOptimized(Resource s, Model model){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + fillMap(s, model, resource2Statements, null); + + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private void fillMap(Resource s, Model model, SortedMap<String, SortedSet<Statement>> resource2Statements, String oldSimilarToken){ + Iterator<Statement> it = model.listStatements(s, null, (RDFNode)null).filterKeep(keepFilter); + Statement st; + SortedSet<Statement> statements; + while(it.hasNext()){ + st = it.next(); + String newSimilarToken = ((QuestionBasedStatementFilter2)keepFilter).getStatement2TokenMap().get(st); + System.out.println(st); + System.out.println(newSimilarToken); + if(!newSimilarToken.equals(oldSimilarToken) || newSimilarToken.equals("ALL")){ + statements = resource2Statements.get(st.getSubject().toString()); + if(statements == null){ + statements = new TreeSet<Statement>(comparator); + resource2Statements.put(st.getSubject().toString(), statements); + } + statements.add(st); + if(st.getObject().isURIResource() && !resource2Statements.containsKey(st.getObject().asResource().getURI())){ + fillMap(st.getObject().asResource(), model, resource2Statements, newSimilarToken); + } + } + + } + } + + private QueryTreeImpl<String> createTree(Resource s, Model model){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + Statement st; + SortedSet<Statement> statements; + Iterator<Statement> it = model.listStatements(statementSelector); + while(it.hasNext()){ + st = it.next(); + statements = resource2Statements.get(st.getSubject().toString()); + if(statements == null){ + statements = new TreeSet<Statement>(comparator); + resource2Statements.put(st.getSubject().toString(), statements); + } + statements.add(st); + } + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements){ + tree.setId(nodeId++); + if(resource2Statements.containsKey(tree.getUserObject())){ + QueryTreeImpl<String> subTree; + Property predicate; + RDFNode object; + for(Statement st : resource2Statements.get(tree.getUserObject())){ + predicate = st.getPredicate(); + object = st.getObject(); + if(!predicateFilter.isRelevantResource(predicate.getURI())){ + continue; + } + if(predicateFilters.contains(st.getPredicate().toString())){ + continue; + } + if(object.isLiteral()){ + Literal lit = st.getLiteral(); + String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); + StringBuilder sb = new StringBuilder(); + sb.append("\"").append(escapedLit).append("\""); + if(lit.getDatatypeURI() != null){ + sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + } + if(!lit.getLanguage().isEmpty()){ + sb.append("@").append(lit.getLanguage()); + } + subTree = new QueryTreeImpl<String>(sb.toString()); +// subTree = new QueryTreeImpl<String>(lit.toString()); + subTree.setId(nodeId++); + subTree.setLiteralNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ + if(tree.getUserObjectPathToRoot().size() < 3 && + !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + fillTree(subTree, resource2Statements); + } + } + } + } + } + + class StatementComparator implements Comparator<Statement>{ + + @Override + public int compare(Statement s1, Statement s2) { +// if(s1.getPredicate() == null && s2.getPredicate() == null){ +// return 0; +// } +// return s1.getPredicate().toString().compareTo(s2.getPredicate().toString()) +// + s1.getObject().toString().compareTo(s2.getObject().toString()); + if(s1.getPredicate() == null && s2.getPredicate() == null){ + return 0; + } + + if(s1.getPredicate().toString().compareTo(s2.getPredicate().toString()) == 0){ + return s1.getObject().toString().compareTo(s2.getObject().toString()); + } else { + return s1.getPredicate().toString().compareTo(s2.getPredicate().toString()); + } + + } + + + + } + + public static String encode(String s) { + char [] htmlChars = s.toCharArray(); + StringBuffer encodedHtml = new StringBuffer(); + for (int i=0; i<htmlChars.length; i++) { + switch(htmlChars[i]) { + case '<': + encodedHtml.append("<"); + break; + case '>': + encodedHtml.append(">"); + break; + case '&': + encodedHtml.append("&"); + break; + case '\'': + encodedHtml.append("'"); + break; + case '"': + encodedHtml.append("""); + break; + case '\\': + encodedHtml.append("\"); + break; + case (char)133: + encodedHtml.append("…"); + break; + default: + encodedHtml.append(htmlChars[i]); + break; + } + } + return encodedHtml.toString(); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,40 @@ +package org.dllearner.algorithm.qtl.operations; + +import java.util.ArrayList; +import java.util.List; + +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.algebra.Op; +import com.hp.hpl.jena.sparql.algebra.OpVisitorBase; +import com.hp.hpl.jena.sparql.algebra.op.OpBGP; +import com.hp.hpl.jena.sparql.algebra.op.OpFilter; +import com.hp.hpl.jena.sparql.algebra.op.OpProject; +import com.hp.hpl.jena.sparql.algebra.op.OpTriple; + +public class FilterVisitor extends OpVisitorBase { + + private List<Op> ops = new ArrayList<Op>(); + + + + @Override + public void visit(OpProject opProject) { + opProject.getSubOp().visit(this) ; + } + + @Override + public void visit(OpBGP opBGP) { + for (Triple t : opBGP.getPattern()){ + if(t.getObject().isURI()){ + System.out.println(t.getObject().toString()); + } + } + } + + @Override + public void visit(OpFilter opFilter) { + // TODO Auto-generated method stub + super.visit(opFilter); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -1,5 +1,9 @@ package org.dllearner.algorithm.tbsl.learning; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -28,12 +32,15 @@ import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Prefixes; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.ini4j.InvalidFileFormatException; +import org.ini4j.Options; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; @@ -53,17 +60,18 @@ LUCENE, SIMILARITY, NONE } + private static final String OPTIONS_FILE = "tbsl/tbsl.properties"; + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); - private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); + private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - private static final int TOP_K = 5; - private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-3.1.0"; private static final int RECURSION_DEPTH = 2; - private Ranking ranking = Ranking.SIMILARITY; - private boolean useRemoteEndpointValidation = true; - private boolean stopIfQueryResultNotEmpty = true; - private int maxTestedQueriesPerTemplate = 25; + private Ranking ranking; + private boolean useRemoteEndpointValidation; + private boolean stopIfQueryResultNotEmpty; + private int maxTestedQueriesPerTemplate; + private int maxQueryExecutionTimeInSeconds; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -92,35 +100,59 @@ private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); - private int maxQueryExecutionTimeInSeconds = 20; - - public SPARQLTemplateBasedLearner(){ - resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); - resource_index.setHitsPerPage(TOP_K); - class_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_classes"); - class_index.setHitsPerPage(TOP_K); - property_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_properties"); - property_index.setHitsPerPage(TOP_K); + try { + init(new Options(this.getClass().getClassLoader().getResourceAsStream(OPTIONS_FILE))); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } Set<String> predicateFilters = new HashSet<String>(); predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - prefixMap = new HashMap<String, String>(); - prefixMap.put(RDF.getURI(), "rdf"); - prefixMap.put(RDFS.getURI(), "rdfs"); - prefixMap.put("http://dbpedia.org/ontology/", "dbo"); - prefixMap.put("http://dbpedia.org/property/", "dbp"); - prefixMap.put("http://dbpedia.org/resource/", "dbr"); - prefixMap.put(FOAF.getURI(), "foaf"); - prefixMap.put("http://dbpedia.org/class/yago/", "yago"); + prefixMap = Prefixes.getPrefixes(); modelGenenerator = new ModelGenerator(endpoint, predicateFilters); templateGenerator = new Templator(); + } + + public SPARQLTemplateBasedLearner(String optionsFile){ + try { + init(new Options(new FileReader(new File(optionsFile)))); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public SPARQLTemplateBasedLearner(Options options){ + init(options); + } + + private void init(Options options){ + String resourcesIndexUrl = options.fetch("solr.resources.url"); + resource_index = new SolrSearch(resourcesIndexUrl); + String classesIndexUrl = options.fetch("solr.classes.url"); + class_index = new SolrSearch(classesIndexUrl); + + String propertiesIndexUrl = options.fetch("solr.properties.url"); + property_index = new SolrSearch(propertiesIndexUrl); + + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + + ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; + stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); + maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); } public void setEndpoint(SparqlEndpoint endpoint){ @@ -475,7 +507,8 @@ tmp = new TreeSet<String>(new StringSimilarityComparator(word)); uris = uriCache.get(word); if(uris == null){ - uris = index.getResources("label:\"" + word + "\"~0.7"); +// uris = index.getResources("label:\"" + word + "\"~0.7"); + uris = index.getResources("label:" + word + "~0.5"); uriCache.put(word, uris); } tmp.addAll(uris); @@ -711,7 +744,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who developed the video game World of Warcraft?"; + String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -46,6 +46,7 @@ params.set("rows", hitsPerPage); params.set("start", offset); response = server.query(params); + SolrDocumentList docList = response.getResults(); lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.HashMap; +import java.util.Map; + +import com.hp.hpl.jena.sparql.vocabulary.FOAF; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; + + +public class Prefixes { + + private static Map<String, String> prefixes = new HashMap<String, String>(); + + static { + prefixes.put(RDF.getURI(), "rdf"); + prefixes.put(RDFS.getURI(), "rdfs"); + prefixes.put("http://dbpedia.org/ontology/", "dbo"); + prefixes.put("http://dbpedia.org/property/", "dbp"); + prefixes.put("http://dbpedia.org/resource/", "dbr"); + prefixes.put(FOAF.getURI(), "foaf"); + prefixes.put("http://dbpedia.org/class/yago/", "yago"); + } + + public static Map<String, String> getPrefixes(){ + return prefixes; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties 2011-07-21 11:36:47 UTC (rev 2954) @@ -4,7 +4,7 @@ # validate against remote endpoint if 'true', otherwise load a model and validate against it useRemoteEndpointValidation=true # number of tested SPARQL queries per template -maxTestedQueriesPerTemplate=25 +maxTestedQueriesPerTemplate=150 # max execution time for a SPARQL query before canceled -maxQueryExecutionTimeInSeconds=20 +maxQueryExecutionTimeInSeconds=40 Added: trunk/components-ext/src/main/resources/tbsl/tbsl.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/tbsl.properties (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/tbsl.properties 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,16 @@ +solr.server.url = http://139.18.2.173:8080/apache-solr-3.3.0 +solr.classes.url = ${solr.server.url}/dbpedia_classes +solr.resources.url = ${solr.server.url}/dbpedia_resources +solr.properties.url = ${solr.server.url}/dbpedia_properties +solr.query.limit = 20 + +sparql.endpoint.url = http://live.dbpedia.org/sparql +sparql.endpoint.defaultGraph = http://dbpedia.org +sparql.query.maxExecutionTimeInSeconds = 20 + +!remote | local +learning.validationType = remote +learning.stopAfterFirstNonEmptyQueryResult = true +learning.maxTestedQueriesPerTemplate = 20 +!similarity | lucene | none +learning.ranking = similarity Property changes on: trunk/components-ext/src/main/resources/tbsl/tbsl.properties ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -1,7 +1,13 @@ package org.dllearner.algorithm.qtl; +import java.util.Arrays; +import java.util.HashSet; + import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter2; import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl2; import org.dllearner.algorithm.qtl.operations.Generalisation; import org.junit.Test; @@ -22,7 +28,7 @@ @Test public void generalisationTest1(){ - String resource = "http://dbpedia.org/resource/Leipzig"; + String resource = "http://dbpedia.org/resource/Chelsea_F.C."; Generalisation<String> gen = new Generalisation<String>(); Model model = getModelForExample(resource, maxModelSizePerExample); @@ -31,8 +37,35 @@ QueryTree<String> genTree = gen.generalise(tree); String query = genTree.toSPARQLQueryString(); System.out.println(query); + System.out.println(tree.toQuery()); } + @Test + public void generalisationTest2(){ +// String resource = "http://dbpedia.org/resource/Interview_with_the_Vampire:_The_Vampire_Chronicles"; + String resource = "http://dbpedia.org/resource/Arsenal_F.C."; + + Generalisation<String> gen = new Generalisation<String>(); + Model model = getModelForExample(resource, maxModelSizePerExample); + QueryTreeFactory<String> treeFactory = new QueryTreeFactoryImpl2(); + QuestionBasedStatementFilter2 filter = new QuestionBasedStatementFilter2(new HashSet( +// Arrays.asList(new String[]{"film", "starring", "Brad Pitt"}))); + Arrays.asList(new String[]{"soccer club", "Premier League", "manager", "France"}))); + filter.setThreshold(0.6); + treeFactory.setStatementFilter(filter); + QueryTree<String> tree = treeFactory.getQueryTree(resource, model); + System.out.println(tree.getStringRepresentation()); + + QueryTreeFactory<String> treeFactory2 = new QueryTreeFactoryImpl(); + QuestionBasedStatementFilter filter2 = new QuestionBasedStatementFilter(new HashSet( +// Arrays.asList(new String[]{"film", "starring", "Brad Pitt"}))); + Arrays.asList(new String[]{"soccer club", "Premier League", "manager", "France"}))); + filter2.setThreshold(0.6); + treeFactory2.setStatementFilter(filter2); + QueryTree<String> tree2 = treeFactory2.getQueryTree(resource, model); + System.out.println(tree2.getStringRepresentation()); + } + private Model getModelForExample(String example, int maxSize){ Query query = makeConstructQuery(example, LIMIT, 0); QueryExecution qexec = QueryExecutionFactory.sparqlService(ENDPOINT_URL, query); @@ -63,15 +96,15 @@ sb.append("WHERE {\n"); sb.append("<").append(example).append("> ").append("?p0 ").append("?o0").append(".\n"); for(int i = 1; i < RECURSION_DEPTH; i++){ - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + sb.append("OPTIONAL{?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".}\n"); } sb.append("FILTER (!regex (?p0, \"http://dbpedia.org/property/wikiPage\") && !regex(?p1, \"http://dbpedia.org/property/wikiPage\"))"); sb.append("}\n"); - sb.append("ORDER BY "); - for(int i = 0; i < RECURSION_DEPTH; i++){ - sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); - } +// sb.append("ORDER BY "); +// for(int i = 0; i < RECURSION_DEPTH; i++){ +// sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); +// } sb.append("\n"); sb.append("LIMIT ").append(limit).append("\n"); sb.append("OFFSET ").append(offset); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -73,8 +73,10 @@ tree.dump(); System.out.println("-----------------------------"); cnt++; + System.out.println(((QueryTreeImpl<String>)tree).toQuery()); } + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,57 @@ +package org.dllearner.algorithm.qtl; + +import java.util.HashSet; +import java.util.List; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.dllearner.algorithm.qtl.exception.EmptyLGGException; +import org.dllearner.algorithm.qtl.exception.NegativeTreeCoverageExecption; +import org.dllearner.algorithm.qtl.exception.TimeOutException; +import org.dllearner.algorithm.qtl.filters.QuestionBasedQueryTreeFilterAggressive; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.operations.NBR; +import org.dllearner.algorithm.qtl.operations.PostLGG; +import org.dllearner.algorithm.qtl.util.SPARQLEndpointEx; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import scala.actors.threadpool.Arrays; + +public class QTLTest { + + public static void main(String[] args) throws EmptyLGGException, NegativeTreeCoverageExecption, TimeOutException { + Logger.getLogger(NBR.class).setLevel(Level.DEBUG); + Logger.getLogger(PostLGG.class).setLevel(Level.DEBUG); + List<String> predicateFilters = Arrays.asList(new String[]{"http://dbpedia.org/ontology/wikiPageWikiLink", + "http://dbpedia.org/ontology/wikiPageExternalLink", "http://dbpedia.org/property/wikiPageUsesTemplate"}); + SPARQLEndpointEx endpoint = new SPARQLEndpointEx(SparqlEndpoint.getEndpointDBpediaLiveAKSW(), "", "", new HashSet<String>(predicateFilters)); + + QTL qtl = new QTL(endpoint, new ExtractionDBCache("cache")); + +// List<String> relevantWords = Arrays.asList(new String[]{"film", "star", "Brad Pitt"}); +// List<String> posExamples = Arrays.asList(new String[]{ +// "http://dbpedia.org/resource/Interview_with_the_Vampire:_The_Vampire_Chronicles", +// "http://dbpedia.org/resource/Megamind"}); +// List<String> negExamples = Arrays.asList(new String[]{"http://dbpedia.org/resource/Shukriya:_Till_Death_Do_Us_Apart"}); + + List<String> relevantWords = Arrays.asList(new String[]{"soccer club", "Premier League"}); + List<String> posExamples = Arrays.asList(new String[]{ + "http://dbpedia.org/resource/Arsenal_F.C.", + "http://dbpedia.org/resource/Chelsea_F.C."}); + List<String> negExamples = Arrays.asList(new String[]{}); + + QuestionBasedStatementFilter stmtFilter = new QuestionBasedStatementFilter(new HashSet<String>(relevantWords)); + qtl.addStatementFilter(stmtFilter); + + QuestionBasedQueryTreeFilterAggressive treeFilter = new QuestionBasedQueryTreeFilterAggressive(new HashSet<String>(relevantWords)); + qtl.addQueryTreeFilter(treeFilter); + + + + + String suggestion = qtl.getQuestion(posExamples, negExamples); + System.out.println(suggestion); + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-08-02 12:42:22
|
Revision: 2975 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2975&view=rev Author: lorenz_b Date: 2011-08-02 12:42:15 +0000 (Tue, 02 Aug 2011) Log Message: ----------- Added method to get lexical answer type from template. Loading all resources with inputstreams. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/pom.xml 2011-08-02 12:42:15 UTC (rev 2975) @@ -137,6 +137,11 @@ <artifactId>ini4j</artifactId> <version>0.5.2</version> </dependency> + <dependency> + <groupId>net.didion.jwnl</groupId> + <artifactId>jwnl</artifactId> + <version>1.4.1.RC2</version> + </dependency> </dependencies> <build> <plugins> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,6 +1,7 @@ package org.dllearner.algorithm.tbsl.learning; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -105,7 +106,7 @@ } public SPARQLTemplateBasedLearner(String optionsFile) throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(new Options(new FileReader(new File(optionsFile)))); + this(new Options(new FileInputStream(optionsFile))); } public SPARQLTemplateBasedLearner(Options options){ @@ -141,6 +142,7 @@ maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); + wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); System.setProperty("wordnet.database.dir", wordnetPath); } @@ -735,7 +737,8 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Give me all books written by authors influenced by Ernest Hemingway."; +// String question = "Give me all books written by authors influenced by Ernest Hemingway."; + String question = "Give me all cities in Canada."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); @@ -743,6 +746,7 @@ learner.setQuestion(question); learner.learnSPARQLQueries(); System.out.println(learner.getBestSPARQLQuery()); + System.out.println(learner.getTemplates().iterator().next().getLexicalAnswerType()); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,8 +1,9 @@ package org.dllearner.algorithm.tbsl.ltag.parser; import java.io.BufferedReader; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; @@ -27,13 +28,13 @@ */ public class LTAG_Lexicon_Constructor { - public LTAGLexicon construct(List<String> fileNames) { + public LTAGLexicon construct(List<InputStream> fileStreams) { LTAGLexicon G = new TAG(); - for (String fileName : fileNames) { + for (InputStream fileStream : fileStreams) { - addFileToGrammar(fileName, G); + addFileToGrammar(fileStream, G); } @@ -41,7 +42,7 @@ } - public void addFileToGrammar(String fileName, LTAGLexicon g) { + public void addFileToGrammar(InputStream fileStream, LTAGLexicon g) { ArrayList<Pair<String, TreeNode>> trees = new ArrayList<Pair<String, TreeNode>>(); ArrayList<List<String>> semantics = new ArrayList<List<String>>(); @@ -50,7 +51,7 @@ try { - BufferedReader in = new BufferedReader(new FileReader(fileName)); + BufferedReader in = new BufferedReader(new InputStreamReader(fileStream)); String zeile = null; int lineNo = 0; @@ -99,7 +100,7 @@ } catch (ParseException e) { System.err.println("ParseException in '" - + fileName.substring(fileName.lastIndexOf("/") + 1) + + fileStream + "' at Line " + lineNo + ": '" + items[1].trim() + "'."); continue; @@ -113,11 +114,7 @@ in.close(); } catch (IOException e) { - - System.err.println("IOException: File '" + fileName - + "' not found!"); - return; - + e.printStackTrace(); } g.addTrees(trees, semantics); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.StringReader; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -19,7 +20,8 @@ public StanfordPartOfSpeechTagger(){ try { // String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); - String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getPath(); + String modelPath = getClass().getResource("/tbsl/models/bidirectional-distsim-wsj-0-18.tagger").getPath(); +// String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getFile(); tagger = new MaxentTagger(modelPath); } catch (IOException e) { e.printStackTrace(); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -0,0 +1,74 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.ArrayList; +import java.util.List; + +import net.didion.jwnl.JWNL; +import net.didion.jwnl.JWNLException; +import net.didion.jwnl.data.IndexWord; +import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.Synset; +import net.didion.jwnl.data.Word; +import net.didion.jwnl.dictionary.Dictionary; + +public class WordNet { + + private Dictionary dict; + + public WordNet() { + try { + JWNL.initialize(WordNet.class.getClassLoader().getResourceAsStream("tbsl/wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + + public List<String> getBestSynonyms(POS pos, String s) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + if(iw != null){ + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for(Word w : words){ + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { + synonyms.add(c); + } + } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + + public List<String> getAttributes(String s) { + + List<String> result = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); + if(iw != null){ + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for(Word w : words){ + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { + result.add(c); + } + } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + + return result; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -473,6 +473,19 @@ return true; } - + /** + * Returns the variable in the SPARQL query, which determines the type of the answer + * by an rdf:type property. + * @return + */ + public String getAnswerTypeVariable(){ + SPARQL_Term selection = selTerms.iterator().next(); + for(SPARQL_Triple t : conditions){ + if(t.getVariable().equals(selection) && t.getProperty().getName().equals("type")){ + return t.getValue().getName(); + } + } + return null; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -38,4 +38,14 @@ return slots; } + public List<String> getLexicalAnswerType(){ + String variable = query.getAnswerTypeVariable(); + for(Slot slot : slots){ + if(slot.getAnchor().equals(variable)){ + return slot.getWords(); + } + } + return null; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,7 +1,7 @@ package org.dllearner.algorithm.tbsl.templator; import java.io.BufferedReader; -import java.io.FileReader; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -370,7 +370,7 @@ BufferedReader in; try { - in = new BufferedReader(new FileReader("src/main/resources/tbsl/lexicon/adj_list.txt")); + in = new BufferedReader(new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream("tbsl/lexicon/adj_list.txt"))); String line; while ((line = in.readLine()) != null ) { if (line.contains(adj)) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.templator; +import java.io.InputStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -33,9 +34,9 @@ boolean UNTAGGED_INPUT = true; public Templator() { - List<String> grammarFiles = new ArrayList<String>(); + List<InputStream> grammarFiles = new ArrayList<InputStream>(); for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResource(GRAMMAR_FILES[i]).getPath()); + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); } g = LTAG_Constructor.construct(grammarFiles); Added: trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml 2011-08-02 12:42:15 UTC (rev 2975) @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jwnl_properties language="en"> + <version publisher="Princeton" number="3.0" language="en"/> + <dictionary class="net.didion.jwnl.dictionary.FileBackedDictionary"> + <param name="morphological_processor" value="net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor"> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + <param value="net.didion.jwnl.dictionary.morph.TokenizerOperation"> + <param name="delimiters"> + <param value=" "/> + <param value="-"/> + </param> + <param name="token_operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + </param> + </param> + </param> + </param> + <param name="dictionary_element_factory" value="net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory"/> + <param name="file_manager" value="net.didion.jwnl.dictionary.file_manager.FileManagerImpl"> + <param name="file_type" value="net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile"/> + <param name="dictionary_path" value="src/main/resources/tbsl/dict"/> + </param> + </dictionary> + <resource class="PrincetonResource"/> +</jwnl_properties> \ No newline at end of file Property changes on: trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-08-04 14:13:05
|
Revision: 2995 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2995&view=rev Author: lorenz_b Date: 2011-08-04 14:12:59 +0000 (Thu, 04 Aug 2011) Log Message: ----------- Updated pom. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-08-04 12:27:55 UTC (rev 2994) +++ trunk/components-ext/pom.xml 2011-08-04 14:12:59 UTC (rev 2995) @@ -49,6 +49,14 @@ <artifactId>jamon</artifactId> </dependency> <dependency> + <groupId>org.aksw.commons</groupId> + <artifactId>sparql</artifactId> + </dependency> + <dependency> + <groupId>org.aksw.commons</groupId> + <artifactId>sparql-scala</artifactId> + </dependency> + <dependency> <groupId>org.apache.solr</groupId> <artifactId>solr-core</artifactId> <version>3.3.0</version> Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java 2011-08-04 12:27:55 UTC (rev 2994) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java 2011-08-04 14:12:59 UTC (rev 2995) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.qtl; +import java.util.Arrays; import java.util.HashSet; import java.util.List; @@ -16,7 +17,6 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; -import scala.actors.threadpool.Arrays; public class QTLTest { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2011-10-23 13:50:41
|
Revision: 3316 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3316&view=rev Author: kurzum Date: 2011-10-23 13:50:34 +0000 (Sun, 23 Oct 2011) Log Message: ----------- example of OLiA mapping Modified Paths: -------------- trunk/components-ext/pom.xml Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-10-20 18:58:19 UTC (rev 3315) +++ trunk/components-ext/pom.xml 2011-10-23 13:50:34 UTC (rev 3316) @@ -1,184 +1,190 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> - <groupId>org.dllearner</groupId> - <artifactId>components-ext</artifactId> - <packaging>jar</packaging> + <groupId>org.dllearner</groupId> + <artifactId>components-ext</artifactId> + <packaging>jar</packaging> - <name>components-ext</name> - <url>http://aksw.org/Projects/DLLearner</url> + <name>components-ext</name> + <url>http://aksw.org/Projects/DLLearner</url> - <repositories> - <repository> - <id>Simmetrics</id> - <url>http://maven.mse.jhu.edu/m2repository/</url> - </repository> - <!-- <repository> <id>Harvard Med</id> <url>http://repo.open.med.harvard.edu/nexus/content/repositories/public/</url> - </repository> --> - <repository> - <id>Annolab</id> - <url>http://annolab.org/m2/releases/</url> - </repository> - <!-- <repository> <id>Mirror for WSTX</id> <url>http://mirrors.ibiblio.org/pub/mirrors/maven2/</url> - </repository> --> - </repositories> + <repositories> + <repository> + <id>Simmetrics</id> + <url>http://maven.mse.jhu.edu/m2repository/</url> + </repository> + <!-- <repository> <id>Harvard Med</id> <url>http://repo.open.med.harvard.edu/nexus/content/repositories/public/</url> + </repository> --> + <repository> + <id>Annolab</id> + <url>http://annolab.org/m2/releases/</url> + </repository> + <!-- <repository> <id>Mirror for WSTX</id> <url>http://mirrors.ibiblio.org/pub/mirrors/maven2/</url> + </repository> --> + </repositories> - <parent> - <groupId>org.dllearner</groupId> - <artifactId>dllearner-parent</artifactId> - <version>1.0-SNAPSHOT</version> - </parent> - <dependencies> - <dependency> - <groupId>org.dllearner</groupId> - <artifactId>components-core</artifactId> - <exclusions> - <exclusion> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - </exclusion> - <exclusion> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>com.jamonapi</groupId> - <artifactId>jamon</artifactId> - </dependency> - <dependency> - <groupId>org.aksw.commons</groupId> - <artifactId>sparql</artifactId> - </dependency> - <dependency> - <groupId>org.apache.solr</groupId> - <artifactId>solr-core</artifactId> - <version>3.3.0</version> - <type>jar</type> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - <version>1.6.0</version> - </dependency> - <dependency> - <groupId>edu.stanford</groupId> - <artifactId>postagger</artifactId> - <version>3.0.2</version> - </dependency> - <dependency> - <groupId>lbj</groupId> - <artifactId>library</artifactId> - <version>1.0</version> - </dependency> - <dependency> - <groupId>lbj</groupId> - <artifactId>core</artifactId> - <version>1.0</version> - </dependency> - <dependency> - <groupId>lbj</groupId> - <artifactId>ner</artifactId> - <version>1.0</version> - </dependency> - <dependency> - <groupId>jaws</groupId> - <artifactId>core</artifactId> - <version>1.0</version> - </dependency> - <dependency> - <groupId>uk.ac.shef.wit</groupId> - <artifactId>simmetrics</artifactId> - <version>1.6.2</version> - <type>jar</type> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>stax</groupId> - <artifactId>stax-api</artifactId> - <version>1.0.1</version> - <type>jar</type> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>woodstox</groupId> - <artifactId>wstx-api</artifactId> - <version>3.2.0</version> - <type>jar</type> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-tools</artifactId> - <version>1.5.1-incubating</version> - </dependency> - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-maxent</artifactId> - <version>3.0.1-incubating</version> - </dependency> - <dependency> - <groupId>com.aliasi</groupId> - <artifactId>lingpipe</artifactId> - <version>4.0.1</version> - </dependency> - <dependency> - <groupId>org.annolab.tt4j</groupId> - <artifactId>org.annolab.tt4j</artifactId> - <version>1.0.14</version> - </dependency> + <parent> + <groupId>org.dllearner</groupId> + <artifactId>dllearner-parent</artifactId> + <version>1.0-SNAPSHOT</version> + </parent> + <dependencies> + <dependency> + <groupId>org.dllearner</groupId> + <artifactId>components-core</artifactId> + <exclusions> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.jamonapi</groupId> + <artifactId>jamon</artifactId> + </dependency> + <dependency> + <groupId>org.aksw.commons</groupId> + <artifactId>sparql</artifactId> + </dependency> + <dependency> + <groupId>org.apache.solr</groupId> + <artifactId>solr-core</artifactId> + <version>3.3.0</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>1.6.0</version> + </dependency> + <dependency> + <groupId>edu.stanford</groupId> + <artifactId>postagger</artifactId> + <version>3.0.2</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>library</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>core</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>lbj</groupId> + <artifactId>ner</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>jaws</groupId> + <artifactId>core</artifactId> + <version>1.0</version> + </dependency> + <dependency> + <groupId>uk.ac.shef.wit</groupId> + <artifactId>simmetrics</artifactId> + <version>1.6.2</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>stax</groupId> + <artifactId>stax-api</artifactId> + <version>1.0.1</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>woodstox</groupId> + <artifactId>wstx-api</artifactId> + <version>3.2.0</version> + <type>jar</type> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-tools</artifactId> + <version>1.5.1-incubating</version> + </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-maxent</artifactId> + <version>3.0.1-incubating</version> + </dependency> + <dependency> + <groupId>com.aliasi</groupId> + <artifactId>lingpipe</artifactId> + <version>4.0.1</version> + </dependency> + <dependency> + <groupId>org.annolab.tt4j</groupId> + <artifactId>org.annolab.tt4j</artifactId> + <version>1.0.14</version> + </dependency> + <dependency> + <groupId>org.ini4j</groupId> + <artifactId>ini4j</artifactId> + <version>0.5.2</version> + </dependency> + <dependency> + <groupId>net.didion.jwnl</groupId> + <artifactId>jwnl</artifactId> + <version>1.4.1.RC2</version> + </dependency> + <dependency> + <groupId>org.nlp2rdf</groupId> + <artifactId>nif</artifactId> + <version>1.1-SNAPSHOT</version> + </dependency> - <dependency> - <groupId>org.ini4j</groupId> - <artifactId>ini4j</artifactId> - <version>0.5.2</version> - </dependency> - <dependency> - <groupId>net.didion.jwnl</groupId> - <artifactId>jwnl</artifactId> - <version>1.4.1.RC2</version> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <excludes> - <exclude>org/dllearner/algorithm/tbsl/*</exclude> - <exclude>org/dllearner/algorithm/qtl/*</exclude> - </excludes> - </configuration> - </plugin> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>exec-maven-plugin</artifactId> - <version>1.2</version> - <executions> - <execution> - <goals> - <goal>exec</goal> - </goals> - </execution> - </executions> - <configuration> - <executable>java</executable> - <arguments> - <argument>-Xms512m</argument> - <argument>-Xmx4000m</argument> - </arguments> - </configuration> - </plugin> - </plugins> - </build> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <excludes> + <exclude>org/dllearner/algorithm/tbsl/*</exclude> + <exclude>org/dllearner/algorithm/qtl/*</exclude> + </excludes> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>exec-maven-plugin</artifactId> + <version>1.2</version> + <executions> + + <execution> + <goals> + <goal>exec</goal> + </goals> + </execution> + </executions> + <configuration> + <executable>java</executable> + <arguments> + <argument>-Xms512m</argument> + <argument>-Xmx4000m</argument> + </arguments> + </configuration> + </plugin> + </plugins> + </build> </project> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java 2011-10-23 13:50:34 UTC (rev 3316) @@ -0,0 +1,42 @@ +package org.dllearner.algorithm.tbsl.util; + +import com.hp.hpl.jena.ontology.OntClass; +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import org.apache.log4j.Logger; +import org.nlp2rdf.ontology.olia.OLiAManager; +import org.nlp2rdf.ontology.olia.OLiAOntology; + +import java.util.Set; + +/** + */ +public class NifExamples { + private static Logger log = Logger.getLogger(NifExamples.class); + + public static void main(String[] args) { + OLiAManager m = new OLiAManager(); + OLiAOntology brown = m.getOLiAOntology("http://purl.org/olia/brown-link.rdf"); + + System.out.println(brown); + String posTag = "BED"; + String oliaIndividual = null; + if ((oliaIndividual = brown.getIndividualURIForTag(posTag)) != null) { + log.info("The OLia Annotation individual can be null, if the ontology has a gap"); + log.info(oliaIndividual + ""); + } + //adding pos classes from olia and olia-top + Set<String> classes = brown.getClassURIsForTag(posTag); + log.info("Classes found for the POS tag " + posTag); + log.info("" + classes); + + for (String classUri : classes) { + log.info("found: " + classUri + " for: " + posTag); + OntModel hierarchy = brown.getHierarchy(classUri); + for (ExtendedIterator<OntClass> it = hierarchy.listClasses(); it.hasNext(); ) { + OntClass oc = it.next(); + log.info("flattended: " + oc); + } + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-02-06 13:14:21
|
Revision: 3568 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3568&view=rev Author: christinaunger Date: 2012-02-06 13:14:10 +0000 (Mon, 06 Feb 2012) Log Message: ----------- [tbsl] added QALD2 evaluation questions Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java Added Paths: ----------- trunk/components-ext/questions.txt trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged.xml trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml Added: trunk/components-ext/questions.txt =================================================================== --- trunk/components-ext/questions.txt (rev 0) +++ trunk/components-ext/questions.txt 2012-02-06 13:14:10 UTC (rev 3568) @@ -0,0 +1,100 @@ +1: Give me all female Russian astronauts +2: Give me the birthdays of all actors of the television show Charmed +3: Who is the daughter of Bill Clinton married to +4: Which river does the Brooklyn Bridge cross +5: How many monarchical countries are there in Europe +6: Where did Abraham Lincoln die +7: Is the wife of President Obama called Michelle +8: Which states of Germany are governed by the Social Democratic Party +9: Which US states possess gold minerals +10: In which country does the Nile start +11: Which countries have places with more than two caves +12: Is proinsulin a protein +13: Which classis does the Millepede belong to +14: How tall is Claudia Schiffer +15: Who created Goofy +16: Give me the capitals of all countries in Africa +17: Give me all cities in New Jersey with more than 100000 inhabitants +18: Which museum exhibits The Scream by Munch +19: Is Egypts largest city also its capital +20: How many employees does IBM have +21: Which states border Illinois +22: In which country is the Limerick Lake +23: Which television shows were created by Walt Disney +24: Which mountain is the highest after the Annapurna +25: In which films directed by Garry Marshall was Julia Roberts starring +26: Which bridges are of the same type as the Manhattan Bridge +27: Was US president Jackson involved in a war +28: Which European countries have a constitutional monarchy +29: Which awards did WikiLeaks win +30: Which state of the USA has the highest population density +31: What is the currency of the Czech Republic +32: Which countries in the European Union adopted the Euro +33: What is the area code of Berlin +34: Which countries have more than two official languages +35: Who is the owner of Universal Studios +36: Through which countries does the Yenisei river flow +37: When did Finland join the EU +38: Which monarchs of the United Kingdom were married to a German +39: When was the Battle of Gettysburg +40: What is the highest mountain in Australia +41: Give me all soccer clubs in Spain +42: What are the official languages of the Philippines +43: Who is the mayor of New York City +44: Who designed the Brooklyn Bridge +45: Which telecommunications organizations are located in Belgium +46: Is Frank Herbert still alive +47: What is the highest place of Karakoram +48: Give me the homepage of Forbes +49: Give me all companies in the advertising industry +50: What did Bruce Carver die from +51: Give me all school types +52: Which presidents were born in 1945 +53: Give me all presidents of the United States +54: Who was the wife of US president Lincoln +55: Who developed the video game World of Warcraft +56: What is the official website of Tom Cruise +57: List all episodes of the first season of the HBO television series The Sopranos +58: Who produced the most films +59: Give me all people with first name Jimmy +60: Is there a video game called Battle Chess +61: Which mountains are higher than the Nanga Parbat +62: Who created Wikipedia +63: Give me all actors starring in Batman Begins +64: Which software has been developed by organizations founded in California +65: Which companies work in the aerospace industry as well as on nuclear reactor technology +66: Is Christian Bale starring in Batman Begins +67: Give me the websites of companies with more than 500000 employees +68: Which actors were born in Germany +69: Which caves have more than 3 entrances +70: Give me all films produced by Hal Roach +71: Give me all video games published by Mean Hamster Software +72: Which languages are spoken in Estonia +73: Who owns Aldi +74: Which capitals in Europe were host cities of the summer olympic games +75: Who has been the 5th president of the United States of America +76: How many films did Hal Roach produce +77: Which music albums contain the song Last Christmas +78: Give me all books written by Danielle Steel +79: Which airports are located in California USA +80: Give me all Canadian Grunge record labels +81: Which country has the most official languages +82: In which programming language is GIMP written +83: Who produced films starring Natalie Portman +84: Give me all movies with Tom Cruise +85: In which films did Julia Roberts as well as Richard Gere play +86: Give me all female German chancellors +87: Who wrote the book The pillars of the Earth +88: How many films did Leonardo DiCaprio star in +89: Give me all soccer clubs in the Premier League +90: When was Capcom founded +91: Which organizations were founded in 1950 +92: What is the highest mountain +93: Was Natalie Portman born in the United States +94: Which budget did the first movie of Zdenek Sverak have +95: How many big fires struck Paris during the Middle Ages +96: Is Jens Friebe a vegan +97: How often was Michael Jordan divorced +98: What is the most beautiful painting +99: Give me all animal species that live in the Teutoburg Forest +100: When is the movie Worst Case Scenario going to be in cinemas in the Netherlands Added: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml ___________________________________________________________________ Added: svn:mime-type + application/xml Added: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged.xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Added: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2012-02-06 12:40:04 UTC (rev 3567) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2012-02-06 13:14:10 UTC (rev 3568) @@ -64,7 +64,8 @@ public class Evaluation{ - List<Integer> yagoExclusions = Arrays.asList(new Integer[]{1, 3, 6, 11, 15, 22, 23, 46}); +// List<Integer> yagoExclusions = Arrays.asList(new Integer[]{1, 3, 6, 11, 15, 22, 23, 46}); + List<Integer> exclusions = Arrays.asList(new Integer[]{1,5,8,9,16,28,30,32,38,51,52,53,74,86,94,95,96,97,98,99,100}); Map<Integer, String> evalCodes = new HashMap<Integer, String>(); private static Logger logger = Logger.getLogger(Evaluation.class); @@ -97,8 +98,10 @@ prefixMap = new HashMap<String, String>(); prefixMap.put("rdf", RDF.getURI()); prefixMap.put("rdfs", RDFS.getURI()); - prefixMap.put("onto", "http://dbpedia.org/ontology/"); - prefixMap.put("prop", "http://dbpedia.org/property/"); +// prefixMap.put("onto", "http://dbpedia.org/ontology/"); +// prefixMap.put("prop", "http://dbpedia.org/property/"); + prefixMap.put("dbo", "http://dbpedia.org/ontology/"); + prefixMap.put("dbp", "http://dbpedia.org/property/"); prefixMap.put("res", "http://dbpedia.org/resource/"); prefixMap.put("foaf", FOAF.getURI()); prefixMap.put("yago", "http://dbpedia.org/class/yago/"); @@ -323,7 +326,7 @@ int i = 0; int cnt = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){//if(entry.getKey()==50)continue; - if((testID != -1 && entry.getKey() != testID) || (yagoExclusions.contains(entry.getKey())))continue; + if((testID != -1 && entry.getKey() != testID) || (exclusions.contains(entry.getKey()))) continue; try { questionId = entry.getKey(); question = entry.getValue(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java 2012-02-06 12:40:04 UTC (rev 3567) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java 2012-02-06 13:14:10 UTC (rev 3568) @@ -25,8 +25,8 @@ public class GoldTagger { - static String GOLD = "src/main/resources/tbsl/evaluation/dbpedia-train.xml"; - static String OUT = "target/dbpedia-train-tagged.xml"; + static String GOLD = "src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml"; + static String OUT = "target/qald2-dbpedia-train-tagged.xml"; public static void main(String[] args) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2012-04-28 14:02:51
|
Revision: 3660 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3660&view=rev Author: jenslehmann Date: 2012-04-28 14:02:44 +0000 (Sat, 28 Apr 2012) Log Message: ----------- completed previous commit Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POSTaggerEvaluation.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2012-04-28 13:50:47 UTC (rev 3659) +++ trunk/components-ext/pom.xml 2012-04-28 14:02:44 UTC (rev 3660) @@ -150,11 +150,13 @@ <artifactId>jwnl</artifactId> <version>1.4.1.RC2</version> </dependency> + <!-- <dependency> <groupId>org.nlp2rdf</groupId> <artifactId>nif</artifactId> <version>1.1</version> </dependency> + --> <dependency> <groupId>org.xerial</groupId> <artifactId>sqlite-jdbc</artifactId> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java 2012-04-28 13:50:47 UTC (rev 3659) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/NifExamples.java 2012-04-28 14:02:44 UTC (rev 3660) @@ -4,8 +4,6 @@ import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import org.apache.log4j.Logger; -import org.nlp2rdf.ontology.olia.OLiAManager; -import org.nlp2rdf.ontology.olia.OLiAOntology; import java.util.Set; @@ -15,28 +13,30 @@ private static Logger log = Logger.getLogger(NifExamples.class); public static void main(String[] args) { - OLiAManager m = new OLiAManager(); - OLiAOntology brown = m.getOLiAOntology("http://purl.org/olia/brown-link.rdf"); - - System.out.println(brown); - String posTag = "BED"; - String oliaIndividual = null; - if ((oliaIndividual = brown.getIndividualURIForTag(posTag)) != null) { - log.info("The OLia Annotation individual can be null, if the ontology has a gap"); - log.info(oliaIndividual + ""); - } - //adding pos classes from olia and olia-top - Set<String> classes = brown.getClassURIsForTag(posTag); - log.info("Classes found for the POS tag " + posTag); - log.info("" + classes); - - for (String classUri : classes) { - log.info("found: " + classUri + " for: " + posTag); - OntModel hierarchy = brown.getHierarchy(classUri); - for (ExtendedIterator<OntClass> it = hierarchy.listClasses(); it.hasNext(); ) { - OntClass oc = it.next(); - log.info("flattended: " + oc); - } - } + // commented out since the NIF dependency was removed + +// OLiAManager m = new OLiAManager(); +// OLiAOntology brown = m.getOLiAOntology("http://purl.org/olia/brown-link.rdf"); +// +// System.out.println(brown); +// String posTag = "BED"; +// String oliaIndividual = null; +// if ((oliaIndividual = brown.getIndividualURIForTag(posTag)) != null) { +// log.info("The OLia Annotation individual can be null, if the ontology has a gap"); +// log.info(oliaIndividual + ""); +// } +// //adding pos classes from olia and olia-top +// Set<String> classes = brown.getClassURIsForTag(posTag); +// log.info("Classes found for the POS tag " + posTag); +// log.info("" + classes); +// +// for (String classUri : classes) { +// log.info("found: " + classUri + " for: " + posTag); +// OntModel hierarchy = brown.getHierarchy(classUri); +// for (ExtendedIterator<OntClass> it = hierarchy.listClasses(); it.hasNext(); ) { +// OntClass oc = it.next(); +// log.info("flattended: " + oc); +// } +// } } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POSTaggerEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POSTaggerEvaluation.java 2012-04-28 13:50:47 UTC (rev 3659) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POSTaggerEvaluation.java 2012-04-28 14:02:44 UTC (rev 3660) @@ -11,8 +11,6 @@ import org.dllearner.algorithm.tbsl.nlp.LingPipePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.nlp2rdf.ontology.olia.OLiAManager; -import org.nlp2rdf.ontology.olia.OLiAOntology; import com.aliasi.corpus.ObjectHandler; import com.aliasi.corpus.StringParser; @@ -24,8 +22,10 @@ import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.Filter; -public class POSTaggerEvaluation extends StringParser<ObjectHandler<Tagging<String>>>{ +// commented out because the NIF dependency was removed +public class POSTaggerEvaluation { /* extends StringParser<ObjectHandler<Tagging<String>>>{ + private List<PartOfSpeechTagger> taggers = Arrays.asList(new PartOfSpeechTagger[]{ new ApachePartOfSpeechTagger(), new StanfordPartOfSpeechTagger(), new LingPipePartOfSpeechTagger()}); @@ -44,8 +44,8 @@ public POSTaggerEvaluation() { - brown = m.getOLiAOntology("http://purl.org/olia/brown-link.rdf"); - penn = m.getOLiAOntology("http://purl.org/olia/penn-link.rdf"); +// brown = m.getOLiAOntology("http://purl.org/olia/brown-link.rdf"); +// penn = m.getOLiAOntology("http://purl.org/olia/penn-link.rdf"); } public void run(File directory){ @@ -178,9 +178,9 @@ } } - /* - * Returns TRUE if in the OLia hierarchy is somewhere a common class. - */ + // + // Returns TRUE if in the OLia hierarchy is somewhere a common class. + // private boolean matchesOLiaClass(String brownTag, String pennTag){ Set<String> brownClasses = brown.getClassURIsForTag(brownTag.toUpperCase()); Set<String> pennClasses = penn.getClassURIsForTag(pennTag); @@ -215,5 +215,5 @@ eval.run(new File(args[0])); } - +*/ } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-05-09 15:28:37
|
Revision: 3698 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3698&view=rev Author: christinaunger Date: 2012-05-09 15:28:25 +0000 (Wed, 09 May 2012) Log Message: ----------- [tbsl] repaired parser/templator shortcomings (and probably broke something else ;) Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex Added Paths: ----------- trunk/components-ext/src/main/javacc/ trunk/components-ext/src/main/javacc/DRSParser.jj trunk/components-ext/src/main/javacc/DUDE_Parser.jj trunk/components-ext/src/main/javacc/LTAG_Parser.jj Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/pom.xml 2012-05-09 15:28:25 UTC (rev 3698) @@ -165,7 +165,22 @@ </dependencies> <build> <plugins> + <!-- <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>javacc</id> + <goals> + <goal>javacc</goal> + </goals> + </execution> + </executions> + </plugin> + --> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -236,7 +236,7 @@ Pattern quotePattern2 = Pattern.compile("(``/``((.*)_)''/'')"); Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); - Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+)/NN[S]?)(\\W|$)"); m = quotePattern1.matcher(flat); while (m.find()) { @@ -265,7 +265,7 @@ } m = nnnnpPattern.matcher(flat); while (m.find()) { - flat = flat.replaceFirst(m.group(1),m.group(2) + "/JJ"); + flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3) + "/NNP" + m.group(4)); m = nnnnpPattern.matcher(flat); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -209,7 +209,7 @@ jj_consume_token(2); referent = dr(); jj_consume_token(2); - word = jj_consume_token(WORD); + word = word(); jj_consume_token(2); type = Type(); jj_consume_token(8); @@ -237,7 +237,7 @@ type.setResultType(result); {if (true) return type;} } else if (jj_2_15(2)) { - word = jj_consume_token(WORD); + word = word(); ElementaryType type=null; if (word.toString().equals("e")) type = new ElementaryType(ElemType.e); @@ -344,7 +344,7 @@ DRS drs1; DRS drs2; if (jj_2_29(2)) { - predicate = jj_consume_token(WORD); + predicate = word(); jj_consume_token(10); dr_list = DR_List(); jj_consume_token(8); @@ -476,9 +476,9 @@ Token type; SlotType slottype = null; List<String> words = null; - ref = jj_consume_token(WORD); + ref = word(); jj_consume_token(14); - type = jj_consume_token(WORD); + type = word(); jj_consume_token(14); if (jj_2_35(2)) { words = Word_List(); @@ -502,7 +502,7 @@ final public List<String> Word_List() throws ParseException { Token word; List<String> words = null; - word = jj_consume_token(WORD); + word = word(); if (jj_2_36(2)) { jj_consume_token(15); words = Word_List(); @@ -522,19 +522,31 @@ final public Token dr() throws ParseException { Token t; if (jj_2_37(2)) { - t = jj_consume_token(WORD); + t = jj_consume_token(A); } else if (jj_2_38(2)) { - t = jj_consume_token(DR); - } else if (jj_2_39(2)) { - t = jj_consume_token(QUOTED_STRING); + t = jj_consume_token(C); } else { jj_consume_token(-1); throw new ParseException(); } - {if (true) return t;} + {if (true) return t;} throw new Error("Missing return statement in function"); } + final public Token word() throws ParseException { + Token t; + if (jj_2_39(2)) { + t = jj_consume_token(A); + } else if (jj_2_40(2)) { + t = jj_consume_token(B); + } else { + jj_consume_token(-1); + throw new ParseException(); + } + {if (true) return t;} + throw new Error("Missing return statement in function"); + } + private boolean jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { return !jj_3_1(); } @@ -808,6 +820,13 @@ finally { jj_save(38, xla); } } + private boolean jj_2_40(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_40(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(39, xla); } + } + private boolean jj_3_12() { if (jj_scan_token(2)) return true; if (jj_3R_1()) return true; @@ -815,12 +834,12 @@ } private boolean jj_3_16() { - if (jj_3R_7()) return true; + if (jj_3R_8()) return true; return false; } - private boolean jj_3R_12() { - if (jj_scan_token(WORD)) return true; + private boolean jj_3R_13() { + if (jj_3R_7()) return true; Token xsp; xsp = jj_scanpos; if (jj_3_36()) jj_scanpos = xsp; @@ -834,7 +853,7 @@ } private boolean jj_3_32() { - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } @@ -848,13 +867,8 @@ return false; } - private boolean jj_3_39() { - if (jj_scan_token(QUOTED_STRING)) return true; - return false; - } - private boolean jj_3R_1() { - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } @@ -865,38 +879,58 @@ private boolean jj_3_31() { if (jj_scan_token(13)) return true; - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } - private boolean jj_3R_10() { + private boolean jj_3R_11() { if (jj_scan_token(LABEL)) return true; if (jj_scan_token(11)) return true; return false; } - private boolean jj_3_38() { - if (jj_scan_token(DR)) return true; + private boolean jj_3_40() { + if (jj_scan_token(B)) return true; return false; } - private boolean jj_3R_15() { - if (jj_scan_token(WORD)) return true; + private boolean jj_3R_16() { + if (jj_3R_7()) return true; if (jj_scan_token(14)) return true; return false; } + private boolean jj_3_38() { + if (jj_scan_token(C)) return true; + return false; + } + private boolean jj_3_23() { if (jj_scan_token(MOST)) return true; return false; } + private boolean jj_3_39() { + if (jj_scan_token(A)) return true; + return false; + } + private boolean jj_3_30() { - if (jj_3R_9()) return true; + if (jj_3R_10()) return true; if (jj_scan_token(6)) return true; return false; } + private boolean jj_3R_7() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_39()) { + jj_scanpos = xsp; + if (jj_3_40()) return true; + } + return false; + } + private boolean jj_3_34() { if (jj_scan_token(2)) return true; if (jj_3R_4()) return true; @@ -904,29 +938,26 @@ } private boolean jj_3_37() { - if (jj_scan_token(WORD)) return true; + if (jj_scan_token(A)) return true; return false; } - private boolean jj_3R_9() { + private boolean jj_3R_10() { Token xsp; xsp = jj_scanpos; if (jj_3_37()) { jj_scanpos = xsp; - if (jj_3_38()) { - jj_scanpos = xsp; - if (jj_3_39()) return true; + if (jj_3_38()) return true; } - } return false; } private boolean jj_3R_3() { - if (jj_3R_14()) return true; + if (jj_3R_15()) return true; return false; } - private boolean jj_3R_16() { + private boolean jj_3R_17() { Token xsp; xsp = jj_scanpos; if (jj_3_29()) { @@ -943,19 +974,19 @@ } private boolean jj_3_29() { - if (jj_scan_token(WORD)) return true; + if (jj_3R_7()) return true; if (jj_scan_token(10)) return true; return false; } private boolean jj_3_19() { if (jj_scan_token(2)) return true; - if (jj_3R_8()) return true; + if (jj_3R_9()) return true; return false; } private boolean jj_3_15() { - if (jj_scan_token(WORD)) return true; + if (jj_3R_7()) return true; return false; } @@ -966,7 +997,7 @@ } private boolean jj_3R_4() { - if (jj_3R_15()) return true; + if (jj_3R_16()) return true; return false; } @@ -1009,7 +1040,7 @@ private boolean jj_3_33() { if (jj_scan_token(2)) return true; - if (jj_3R_11()) return true; + if (jj_3R_12()) return true; return false; } @@ -1031,8 +1062,8 @@ return false; } - private boolean jj_3R_8() { - if (jj_3R_16()) return true; + private boolean jj_3R_9() { + if (jj_3R_17()) return true; return false; } @@ -1051,7 +1082,7 @@ return false; } - private boolean jj_3R_13() { + private boolean jj_3R_14() { if (jj_scan_token(10)) return true; if (jj_scan_token(LABEL)) return true; return false; @@ -1063,13 +1094,13 @@ return false; } - private boolean jj_3R_11() { - if (jj_3R_9()) return true; + private boolean jj_3R_12() { + if (jj_3R_10()) return true; return false; } private boolean jj_3_17() { - if (jj_3R_8()) return true; + if (jj_3R_9()) return true; return false; } @@ -1091,19 +1122,19 @@ } private boolean jj_3_35() { - if (jj_3R_12()) return true; + if (jj_3R_13()) return true; return false; } private boolean jj_3_36() { if (jj_scan_token(15)) return true; - if (jj_3R_12()) return true; + if (jj_3R_13()) return true; return false; } private boolean jj_3_18() { if (jj_scan_token(2)) return true; - if (jj_3R_7()) return true; + if (jj_3R_8()) return true; return false; } @@ -1112,7 +1143,7 @@ return false; } - private boolean jj_3R_14() { + private boolean jj_3R_15() { Token xsp; xsp = jj_scanpos; if (jj_3_5()) { @@ -1137,12 +1168,12 @@ } private boolean jj_3R_2() { - if (jj_3R_13()) return true; + if (jj_3R_14()) return true; return false; } - private boolean jj_3R_7() { - if (jj_3R_9()) return true; + private boolean jj_3R_8() { + if (jj_3R_10()) return true; Token xsp; xsp = jj_scanpos; if (jj_3_18()) jj_scanpos = xsp; @@ -1173,7 +1204,7 @@ private static void jj_la1_init_1() { jj_la1_1 = new int[] {}; } - final private JJCalls[] jj_2_rtns = new JJCalls[39]; + final private JJCalls[] jj_2_rtns = new JJCalls[40]; private boolean jj_rescan = false; private int jj_gc = 0; @@ -1357,7 +1388,7 @@ /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[33]; + boolean[] la1tokens = new boolean[34]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -1374,7 +1405,7 @@ } } } - for (int i = 0; i < 33; i++) { + for (int i = 0; i < 34; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; @@ -1401,7 +1432,7 @@ private void jj_rescan_token() { jj_rescan = true; - for (int i = 0; i < 39; i++) { + for (int i = 0; i < 40; i++) { try { JJCalls p = jj_2_rtns[i]; do { @@ -1447,6 +1478,7 @@ case 36: jj_3_37(); break; case 37: jj_3_38(); break; case 38: jj_3_39(); break; + case 39: jj_3_40(); break; } } p = p.next; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -206,7 +206,7 @@ Type type; } { - "(" label=<LABEL> "," referent = dr() "," word=<WORD> "," type = Type() ")" + "(" label=<LABEL> "," referent = dr() "," word=word() "," type = Type() ")" { Argument argument = new Argument(); argument.setLabel(new Label(label.toString())); @@ -234,7 +234,7 @@ | - word = <WORD> + word = word() { ElementaryType type=null; if (word.toString().equals("e")) @@ -330,7 +330,7 @@ } { - predicate=<WORD> "(" dr_list=DR_List() ")" + predicate=word() "(" dr_list=DR_List() ")" { Simple_DRS_Condition condition; @@ -443,7 +443,7 @@ List<String> words = null; } { - ref = <WORD> "/" type = <WORD> "/" (words = Word_List())? + ref = word() "/" type = word() "/" (words = Word_List())? { if (words == null) { @@ -465,7 +465,7 @@ List<String> words = null; } { - word = <WORD> ("^" words = Word_List())? + word = word() ("^" words = Word_List())? { if (words == null) { @@ -498,14 +498,20 @@ TOKEN: {<LABEL: "l"(["0"-"9"])+>} -TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} +//TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} +//TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} -TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} +TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9"])+>} +TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} +TOKEN: {<C: ["?","!"](["a"-"z","A"-"Z","0"-"9"])+>} +Token dr() : { Token t; }{ (t=<A> | t=<C>) { return t; } } +Token word() : { Token t; }{ (t=<A> | t=<B>) { return t; } } + +// Token label() : { Token t; }{ (t=<A> | t=<B> | t=<LABEL>) { return t; } } + TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} -Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } - SKIP : { " " | "\t" | "\n" | "\r" } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -31,11 +31,13 @@ /** RegularExpression Id. */ int LABEL = 25; /** RegularExpression Id. */ - int WORD = 26; + int A = 26; /** RegularExpression Id. */ - int DR = 27; + int B = 27; /** RegularExpression Id. */ - int QUOTED_STRING = 28; + int C = 28; + /** RegularExpression Id. */ + int QUOTED_STRING = 29; /** Lexical state. */ int DEFAULT = 0; @@ -68,8 +70,9 @@ "\"NO\"", "\"HOWMANY\"", "<LABEL>", - "<WORD>", - "<DR>", + "<A>", + "<B>", + "<C>", "<QUOTED_STRING>", "\" \"", "\"\\t\"", Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -23,18 +23,15 @@ switch (pos) { case 0: - if ((active0 & 0x800L) != 0L) - { - jjmatchedKind = 26; - return 2; - } if ((active0 & 0x1ff2280L) != 0L) { jjmatchedKind = 26; - return 8; + return 4; } return -1; case 1: + if ((active0 & 0x802000L) != 0L) + return 4; if ((active0 & 0x17f0280L) != 0L) { if (jjmatchedPos != 1) @@ -42,58 +39,56 @@ jjmatchedKind = 26; jjmatchedPos = 1; } - return 8; + return 4; } - if ((active0 & 0x802000L) != 0L) - return 8; return -1; case 2: if ((active0 & 0x17f0280L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 2; - return 8; + return 4; } if ((active0 & 0x2000L) != 0L) - return 8; + return 4; return -1; case 3: if ((active0 & 0x1190200L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 3; - return 8; + return 4; } if ((active0 & 0x660000L) != 0L) - return 8; + return 4; return -1; case 4: + if ((active0 & 0x10000L) != 0L) + return 4; if ((active0 & 0x1180200L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 4; - return 8; + return 4; } - if ((active0 & 0x10000L) != 0L) - return 8; return -1; case 5: if ((active0 & 0x1180000L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 5; - return 8; + return 4; } return -1; case 6: + if ((active0 & 0x1080000L) != 0L) + return 4; if ((active0 & 0x100000L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 6; - return 8; + return 4; } - if ((active0 & 0x1080000L) != 0L) - return 8; return -1; default : return -1; @@ -217,7 +212,7 @@ return jjMoveStringLiteralDfa3_0(active0, 0x20000L); case 84: if ((active0 & 0x2000L) != 0L) - return jjStartNfaWithStates_0(2, 13, 8); + return jjStartNfaWithStates_0(2, 13, 4); break; case 87: return jjMoveStringLiteralDfa3_0(active0, 0x1000000L); @@ -247,7 +242,7 @@ break; case 69: if ((active0 & 0x40000L) != 0L) - return jjStartNfaWithStates_0(3, 18, 8); + return jjStartNfaWithStates_0(3, 18, 4); break; case 76: return jjMoveStringLiteralDfa4_0(active0, 0x100000L); @@ -257,15 +252,15 @@ return jjMoveStringLiteralDfa4_0(active0, 0x10000L); case 84: if ((active0 & 0x20000L) != 0L) - return jjStartNfaWithStates_0(3, 17, 8); + return jjStartNfaWithStates_0(3, 17, 4); break; case 87: if ((active0 & 0x200000L) != 0L) - return jjStartNfaWithStates_0(3, 21, 8); + return jjStartNfaWithStates_0(3, 21, 4); break; case 89: if ((active0 & 0x400000L) != 0L) - return jjStartNfaWithStates_0(3, 22, 8); + return jjStartNfaWithStates_0(3, 22, 4); break; case 112: return jjMoveStringLiteralDfa4_0(active0, 0x200L); @@ -293,7 +288,7 @@ return jjMoveStringLiteralDfa5_0(active0, 0x80000L); case 89: if ((active0 & 0x10000L) != 0L) - return jjStartNfaWithStates_0(4, 16, 8); + return jjStartNfaWithStates_0(4, 16, 4); break; case 101: return jjMoveStringLiteralDfa5_0(active0, 0x200L); @@ -343,11 +338,11 @@ return jjMoveStringLiteralDfa7_0(active0, 0x100000L); case 84: if ((active0 & 0x80000L) != 0L) - return jjStartNfaWithStates_0(6, 19, 8); + return jjStartNfaWithStates_0(6, 19, 4); break; case 89: if ((active0 & 0x1000000L) != 0L) - return jjStartNfaWithStates_0(6, 24, 8); + return jjStartNfaWithStates_0(6, 24, 4); break; default : break; @@ -367,7 +362,7 @@ { case 84: if ((active0 & 0x100000L) != 0L) - return jjStartNfaWithStates_0(7, 20, 8); + return jjStartNfaWithStates_0(7, 20, 4); break; default : break; @@ -388,7 +383,7 @@ private int jjMoveNfa_0(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 8; + jjnewStateCnt = 11; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -403,33 +398,27 @@ { switch(jjstateSet[--i]) { - case 0: - if ((0x3ff400000000000L & l) != 0L) + case 4: + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); + else if (curChar == 58) + jjCheckNAdd(5); + if ((0x3ff000000000000L & l) != 0L) { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - else if (curChar == 39) - jjCheckNAdd(6); - else if ((0x8000000200000000L & l) != 0L) - jjCheckNAdd(4); - if ((0x400400800000000L & l) != 0L) - { if (kind > 26) kind = 26; jjCheckNAdd(2); } break; - case 8: - if ((0x3ff400000000000L & l) != 0L) + case 0: + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); + else if (curChar == 39) + jjCheckNAdd(9); + else if ((0x8000000200000000L & l) != 0L) + jjCheckNAdd(7); + if ((0x3ff000000000000L & l) != 0L) { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - if ((0x400400800000000L & l) != 0L) - { if (kind > 26) kind = 26; jjCheckNAdd(2); @@ -443,35 +432,46 @@ jjstateSet[jjnewStateCnt++] = 1; break; case 2: - if ((0x400400800000000L & l) == 0L) + if ((0x3ff000000000000L & l) == 0L) break; if (kind > 26) kind = 26; jjCheckNAdd(2); break; case 3: - if ((0x8000000200000000L & l) != 0L) - jjCheckNAdd(4); + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); break; - case 4: - if ((0x3ff400000000000L & l) == 0L) + case 5: + if ((0x3ff400800000000L & l) == 0L) break; if (kind > 27) kind = 27; - jjCheckNAdd(4); + jjCheckNAdd(5); break; - case 5: - if (curChar == 39) - jjCheckNAdd(6); - break; case 6: - if ((0xffffff7fffffffffL & l) != 0L) - jjCheckNAddTwoStates(6, 7); + if ((0x8000000200000000L & l) != 0L) + jjCheckNAdd(7); break; case 7: - if (curChar == 39 && kind > 28) + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 28) kind = 28; + jjCheckNAdd(7); break; + case 8: + if (curChar == 39) + jjCheckNAdd(9); + break; + case 9: + if ((0xffffff7fffffffffL & l) != 0L) + jjCheckNAddTwoStates(9, 10); + break; + case 10: + if (curChar == 39 && kind > 29) + kind = 29; + break; default : break; } } while(i != startsAt); @@ -483,51 +483,54 @@ { switch(jjstateSet[--i]) { - case 0: + case 4: if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + if ((0x7fffffe07fffffeL & l) != 0L) { if (kind > 26) kind = 26; jjCheckNAdd(2); } - if ((0x7fffffe07fffffeL & l) != 0L) - { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - if (curChar == 108) - jjstateSet[jjnewStateCnt++] = 1; break; - case 8: + case 0: if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + if ((0x7fffffe07fffffeL & l) != 0L) { if (kind > 26) kind = 26; jjCheckNAdd(2); } - if ((0x7fffffe07fffffeL & l) != 0L) - { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } + if (curChar == 108) + jjstateSet[jjnewStateCnt++] = 1; break; case 2: - if ((0x7fffffe87fffffeL & l) == 0L) + if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 26) kind = 26; jjCheckNAdd(2); break; - case 4: - if ((0x7fffffe07fffffeL & l) == 0L) + case 3: + if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + break; + case 5: + if ((0x7fffffe87fffffeL & l) == 0L) break; if (kind > 27) kind = 27; - jjCheckNAdd(4); + jjstateSet[jjnewStateCnt++] = 5; break; - case 6: + case 7: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 28) + kind = 28; + jjstateSet[jjnewStateCnt++] = 7; + break; + case 9: jjAddStates(0, 1); break; default : break; @@ -542,7 +545,7 @@ { switch(jjstateSet[--i]) { - case 6: + case 9: if ((jjbitVec0[i2] & l2) != 0L) jjAddStates(0, 1); break; @@ -557,14 +560,14 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 8 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 11 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } } } static final int[] jjnextStates = { - 6, 7, + 9, 10, }; /** Token literal values. */ @@ -573,21 +576,21 @@ "\163\143\157\160\145\50", "\50", "\72\133", "\174", "\116\117\124", "\57", "\136", "\105\126\105\122\131", "\115\117\123\124", "\123\117\115\105", "\124\110\105\115\117\123\124", "\124\110\105\114\105\101\123\124", "\101\106\105\127", "\115\101\116\131", "\116\117", -"\110\117\127\115\101\116\131", null, null, null, null, null, null, null, null, }; +"\110\117\127\115\101\116\131", null, null, null, null, null, null, null, null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { "DEFAULT", }; static final long[] jjtoToken = { - 0x1fffffffL, + 0x3fffffffL, }; static final long[] jjtoSkip = { - 0x1e0000000L, + 0x3c0000000L, }; protected SimpleCharStream input_stream; -private final int[] jjrounds = new int[8]; -private final int[] jjstateSet = new int[16]; +private final int[] jjrounds = new int[11]; +private final int[] jjstateSet = new int[22]; protected char curChar; /** Constructor. */ public DUDE_ParserTokenManager(SimpleCharStream stream){ @@ -614,7 +617,7 @@ { int i; jjround = 0x80000001; - for (i = 8; i-- > 0;) + for (i = 11; i-- > 0;) jjrounds[i] = 0x80000000; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -35,6 +35,14 @@ String type = "UNSPEC"; String slot; + /* PRONOUN HACK */ + if (pos.equals("PRP") || pos.equals("PRP$")) { + String[] pronEntry = {token, + "(DET DET:'" + token.toLowerCase() + "')", + "<x,l1,e,[ l1:[ x | ] ],[],[],[]>"}; + result.add(pronEntry); + } + /* NOUNS */ if (equalsOneOf(pos,noun)) { @@ -329,12 +337,12 @@ slot = "SLOT_" + token + "/PROPERTY/"; String[] npAdjunct = {token, "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - // "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + - "<x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; String[] vpAdjunct = {token, "(VP VP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - // "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + - "<x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; result.add(npAdjunct); result.add(vpAdjunct); } Added: trunk/components-ext/src/main/javacc/DRSParser.jj =================================================================== --- trunk/components-ext/src/main/javacc/DRSParser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/DRSParser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,259 @@ + +options { + LOOKAHEAD = 2; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(DRSParser) + +package org.dllearner.algorithm.tbsl.sem.drs.reader; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.*; +import org.dllearner.algorithm.tbsl.sem.util.Label; + +public class DRSParser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + DRSParser parser = new DRSParser(System.in); + parser.Input(); + } + +} + +PARSER_END(DRSParser) + +/** Root production. */ +void Input() : +{} +{ + DRS() <EOF> +} + +/** DRS */ +DRS DRS() : +{ + Set<DiscourseReferent> dr_set = null; + Set<DRS_Condition> conditions = null; + DRS drs; + Token label = null; + +} +{ + (label=<LABEL> ":")? "[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" + { + if (dr_set == null) + { + dr_set = new HashSet<DiscourseReferent>(); + } + drs = new DRS(); + if (label != null) + { + drs.setLabel(label.toString()); + } + drs.setDiscourseReferents(dr_set); + if (conditions != null) + { + drs.setDRSConditions(conditions); + } + return drs; + } +} + +/** DR_Set*/ +Set<DiscourseReferent> DR_Set() : +{ + Token dr; + Set<DiscourseReferent> dr_set=null; +} +{ + dr = dr() ("," dr_set=DR_Set())? + { + if (dr_set == null) + { + dr_set= new HashSet<DiscourseReferent>(); + } + if (dr.toString().startsWith("?")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("!")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else + { + dr_set.add(new DiscourseReferent(dr.toString(),false,false)); + } + return dr_set; + } +} + +Set<DRS_Condition> Condition_List() : +{ + DRS_Condition condition= null; + Set<DRS_Condition> conditions = null; +} +{ + condition=Condition() ("," conditions=Condition_List())? + { + if (conditions == null) + { + conditions = new HashSet<DRS_Condition>(); + } + conditions.add(condition); + return conditions; + } +} + +DRS_Condition Condition() : +{ + List<DiscourseReferent> dr_list; + Token dr1; + Token dr2; + Token dr; + Token predicate; + Token quantifier; + DRS drs1; + DRS drs2; +} +{ + + predicate=<WORD> "(" dr_list=DR_List() ")" + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate(predicate.toString()); + condition.setArguments(dr_list); + return condition; + } + + | + + dr1 = dr() "=" dr2 = dr() + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate("equal"); + condition.addArgument(new DiscourseReferent(dr1.toString())); + condition.addArgument(new DiscourseReferent(dr2.toString())); + return condition; + } + + | + + "NOT" drs1=DRS() + { + Negated_DRS drs = new Negated_DRS(); + drs.setDRS(drs1); + return drs; + } + + | + + drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() + { + Complex_DRS_Condition drs; + drs = new Complex_DRS_Condition(); + drs.setRestrictor(drs1); + drs.setScope(drs2); + drs.setReferent(new DiscourseReferent(dr.toString())); + + if (quantifier.toString().equals("EVERY")) {drs.setQuantifier(DRS_Quantifier.EVERY);} + if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} + if (quantifier.toString().equals("MOST")) {drs.setQuantifier(DRS_Quantifier.MOST);} + if (quantifier.toString().equals("THEMOST")) {drs.setQuantifier(DRS_Quantifier.THEMOST);} + if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} + if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} + if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} + if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} + if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + + return drs; + + } +} + +/** DR_List*/ +List<DiscourseReferent> DR_List() : +{ + Token dr; + List<DiscourseReferent> dr_list=null; +} +{ + dr = dr() ("," dr_list=DR_List())? + { + if (dr_list == null) + { + dr_list= new ArrayList<DiscourseReferent>(); + } + + if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else { + dr_list.add(0,new DiscourseReferent(dr.toString(),false,false)); + } + + return dr_list; + } +} + + +TOKEN: {<EVERY: "EVERY">} + +TOKEN: {<MOST: "MOST">} + +TOKEN: {<SOME: "SOME">} + +TOKEN: {<THEMOST: "THEMOST">} + +TOKEN: {<THELEAST: "THELEAST">} + +TOKEN: {<AFEW: "AFEW">} + +TOKEN: {<MANY: "MANY">} + +TOKEN: {<NO: "NO">} + +TOKEN: {<HOWMANY: "HOWMANY">} + +TOKEN: {<LABEL: "l"(["0"-"9"])+>} + +TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":"])+>} + +TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} + +TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} + +Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } + +SKIP : { " " | "\t" | "\n" | "\r" } + + Property changes on: trunk/components-ext/src/main/javacc/DRSParser.jj ___________________________________________________________________ Added: svn:executable + * Copied: trunk/components-ext/src/main/javacc/DUDE_Parser.jj (from rev 3694, trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj) =================================================================== --- trunk/components-ext/src/main/javacc/DUDE_Parser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/DUDE_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,513 @@ + +options { + LOOKAHEAD = 2; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(DUDE_Parser) + +package org.dllearner.algorithm.tbsl.sem.dudes.reader; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.*; +import org.dllearner.algorithm.tbsl.sem.dudes.data.*; +import org.dllearner.algorithm.tbsl.sem.util.*; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; + +public class DUDE_Parser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + DUDE_Parser parser = new DUDE_Parser(System.in); + parser.Input(); + } + +} + +PARSER_END(DUDE_Parser) + +/** Root production. */ +void Input() : +{} +{ + DUDE() <EOF> +} + +/** DUDE */ +Dude DUDE() : +{ + Token referent; + Token label; + Type type; + List<DRS> drs_list = null; + List<DominanceConstraint> constraints = null; + List<Argument> arg_list = null; + List<Slot> slots = null; +} +{ + "<" referent = dr() "," label=<LABEL> "," type=Type() "," "[" (drs_list=DRS_List())? "]" "," "[" (arg_list = Arg_List())? "]" + "," "[" (constraints = DC_List())? "]" "," "[" (slots = Slot_List())? "]" ">" + { + Dude dude = new Dude(); + dude.setReferent(referent.toString()); + dude.setType(type); + if (drs_list != null) dude.setComponents(drs_list); + dude.setLabel(new Label(label.toString())); + if (arg_list != null) dude.setArguments(arg_list); + if (constraints != null) dude.setDominanceConstraints(constraints); + if (slots != null) dude.setSlots(slots); + return dude; + } +} + +DominanceConstraint DominanceConstraint() : +{ + Label label1; + Label label2; + Token domType; + DominanceConstraint dc = null; +} +{ + label1 = Label() "<" label2 = Label() + { + dc = new DominanceConstraint(label1,label2); + return dc; + } + + | + + label1 = Label() ">" label2 = Label() + { + dc = new DominanceConstraint(label2,label1); + return dc; + } + + | + + label1 = Label() "=" label2 = Label() + { + dc = new DominanceConstraint(label1,label2); + dc.setType(DomType.equal); + return dc; + } +} + + +Label Label() : +{ + Token label; +} +{ + + label = <LABEL> + { + return new Label(label.toString()); + } + + | + + "res(" label = <LABEL> ")" + { + return new Label(label.toString(),Position.res); + } + + | + + "scope(" label = <LABEL> ")" + { + return new Label(label.toString(),Position.scope); + } +} + + +List<DominanceConstraint> DC_List() : +{ +List<DominanceConstraint> dc_list = null; +DominanceConstraint dc = null; +} +{ + dc = DominanceConstraint() ("," dc_list = DC_List())? + { + if (dc_list == null) + { + dc_list = new ArrayList<DominanceConstraint>(); + } + + dc_list.add(0,dc); + return dc_list; + } + + +} + + +List<DRS> DRS_List() : +{ + DRS drs; + List<DRS> drs_list = null; +} +{ + drs = DRS() ("," drs_list = DRS_List())? + { + if (drs_list == null) + { + drs_list = new ArrayList<DRS>(); + } + + drs_list.add(0,drs); + return drs_list; + } +} + +List<Argument> Arg_List() : +{ + Argument argument; + List<Argument> arg_list = null; +} +{ + argument = Argument() ("," arg_list = Arg_List())? + { + if (arg_list == null) + { + arg_list = new ArrayList<Argument>(); + } + + arg_list.add(0,argument); + return arg_list; + } + +} + +Argument Argument() : +{ + Token label; + Token word; + Token referent; + Type type; +} +{ + "(" label=<LABEL> "," referent = dr() "," word=word() "," type = Type() ")" + { + Argument argument = new Argument(); + argument.setLabel(new Label(label.toString())); + argument.setReferent(referent.toString()); + argument.setAnchor(word.toString()); + argument.setType(type); + return argument; + } +} + +Type Type() : +{ + Type argument; + Type result; + Token word; +} +{ + "<" argument = Type() "," result = Type() ">" + { + CompositeType type = new CompositeType(); + type.setArgumentType(argument); + type.setResultType(result); + return type; + } + + | + + word = word() + { + ElementaryType type=null; + if (word.toString().equals("e")) + type = new ElementaryType(ElemType.e); + if (word.toString().equals("t")) + type = new ElementaryType(ElemType.t); + return type; + } +} + + +/** DRS */ +DRS DRS() : +{ + Set<DiscourseReferent> dr_set = null; + Set<DRS_Condition> conditions = null; + DRS drs; + Token label; + +} +{ + label=<LABEL> ":[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" + { + if (dr_set == null) + { + dr_set = new HashSet<DiscourseReferent>(); + } + drs = new DRS(); + drs.setLabel(label.toString()); + drs.setDiscourseReferents(dr_set); + if (conditions != null) + drs.setDRSConditions(conditions); + return drs; + } +} + +/** DR_Set*/ +Set<DiscourseReferent> DR_Set() : +{ + Token dr; + Set<DiscourseReferent> dr_set=null; +} +{ + dr = dr() ("," dr_set=DR_Set())? + { + if (dr_set == null) + { + dr_set= new HashSet<DiscourseReferent>(); + } + if (dr.toString().startsWith("?")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("!")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else + { + dr_set.add(new DiscourseReferent(dr.toString(),false,false)); + } + return dr_set; + } +} + +Set<DRS_Condition> Condition_List() : +{ + DRS_Condition condition= null; + Set<DRS_Condition> conditions = null; +} +{ + condition=Condition() ("," conditions=Condition_List())? + { + if (conditions == null) + { + conditions = new HashSet<DRS_Condition>(); + } + conditions.add(condition); + return conditions; + } +} + +DRS_Condition Condition() : +{ + List<DiscourseReferent> dr_list; + Token dr1; + Token dr2; + Token dr; + Token predicate; + Token quantifier; + DRS drs1; + DRS drs2; +} +{ + + predicate=word() "(" dr_list=DR_List() ")" + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate(predicate.toString()); + condition.setArguments(dr_list); + return condition; + } + + | + + dr1 = dr() "=" dr2 = dr() + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate("equal"); + condition.addArgument(new DiscourseReferent(dr1.toString())); + condition.addArgument(new DiscourseReferent(dr2.toString())); + return condition; + } + + | + + "NOT" drs1=DRS() + { + Negated_DRS drs = new Negated_DRS(); + drs.setDRS(drs1); + return drs; + } + + | + + drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() + { + Complex_DRS_Condition drs; + drs = new Complex_DRS_Condition(); + drs.setRestrictor(drs1); + drs.setScope(drs2); + drs.setReferent(new DiscourseReferent(dr.toString())); + + if (quantifier.toString().equals("EVERY")) {drs.setQuantifier(DRS_Quantifier.EVERY);} + if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} + if (quantifier.toString().equals("MOST")) {drs.setQuantifier(DRS_Quantifier.MOST);} + if (quantifier.toString().equals("THEMOST")) {drs.setQuantifier(DRS_Quantifier.THEMOST);} + if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} + if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} + if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} + if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} + if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + + return drs; + + } +} + +/** DR_List*/ +List<DiscourseReferent> DR_List() : +{ + Token dr; + List<DiscourseReferent> dr_list=null; +} +{ + dr = dr() ("," dr_list=DR_List())? + { + if (dr_list == null) + { + dr_list= new ArrayList<DiscourseReferent>(); + } + + if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else { + dr_list.add(0,new DiscourseReferent(dr.toString(),false,false)); + } + + return dr_list; + } +} + + +List<Slot> Slot_List() : +{ + Slot slot; + List<Slot> slots = null; +} +{ + slot = Slot() ("," slots = Slot_List())? + { + if (slots == null) + { + slots = new ArrayList<Slot>(); + } + + slots.add(slot); + return slots; + } +} + +Slot Slot() : +{ + Token ref; + Token type; + SlotType slottype = null; + List<String> words = null; +} +{ + ref = word() "/" type = word() "/" (words = Word_List())? + { + if (words == null) + { + words = new ArrayList<String>(); + } + if (type.toString().equals("CLASS")) { slottype = SlotType.CLASS; } + else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } + else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } + else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } + else { slottype = SlotType.UNSPEC; } + + return new Slot(ref.toString(),slottype,words); + } +} + +List<String> Word_List() : +{ + Token word; + List<String> words = null; +} +{ + word = word() ("^" words = Word_List())? + { + if (words == null) + { + words = new ArrayList<String>(); + } + + words.add(0,word.toString()); + return words; + } +} + + +TOKEN: {<EVERY: "EVERY">} + +TOKEN: {<MOST: "MOST">} + +TOKEN: {<SOME: "SOME">} + +TOKEN: {<THEMOST: "THEMOST">} + +TOKEN: {<THELEAST: "THELEAST">} + +TOKEN: {<AFEW: "AFEW">} + +TOKEN: {<MANY: "MANY">} + +TOKEN: {<NO: "NO">} + +TOKEN: {<HOWMANY: "HOWMANY">} + +TOKEN: {<LABEL: "l"(["0"-"9"])+>} + +TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} + +TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} + +TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} + +Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } +Token word() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } + +SKIP : { " " | "\t" | "\n" | "\r" } + + + Added: trunk/components-ext/src/main/javacc/LTAG_Parser.jj =================================================================== --- trunk/components-ext/src/main/javacc/LTAG_Parser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/LTAG_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,212 @@ + +options { + LOOKAHEAD = 5; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(LTAGTreeParser) + +package org.dllearner.algorithm.tbsl.ltag.reader; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.dllearner.algorithm.tbsl.ltag.data.*; +import org.dllearner.algorithm.tbsl.ltag.agreement.*; + +public class LTAGTreeParser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + LTAGTreeParser parser = new LTAGTreeParser(System.in); + parser.Input(); + } + +} + +PARSER_END(LTAGTreeParser) + +/** Root production. */ +void Input() : +{} +{ + Tree() <EOF> +} + +/** Tree */ +TreeNode Tree() : +{ + Category category; + String terminal = ""; + List<TreeNode> treelist; + Token word; + Feature feature = null; +} + +{ + // SubstNode with case constraints (e.g. DP[subj]|nom) + category = Cat() "[" word=<WORD> "]" ("{" feature=Feat() "}")? + { + SubstNode substnode = new SubstNode(word.toString(),category,feature); + return substnode; + } + + | + + // FootNode (e.g. S*) + category = Cat() "*" + { + FootNode footnode = new FootNode(category); + return footnode; + } + + | + + // FootNode with no adjunction allowed (e.g. ^S*) + "^" category = Cat() "*" + { + FootNode footnode = new FootNode(category); + footnode.setAdjConstraint(true); + return footnode; + } + + | + + // TreeNode which has case feature marked (e.g. (S|nom ...)) + "(" category = Cat() ("{" feature=Feat() "}")? treelist=TreeList() ")" + { + TreeNode tree = new Tree(); + tree.setCategory(category); + tree.setChildren(treelist); + tree.setParentForTree(); + tree.setFeature(feature); + return tree; + } + + | + + // TreeNode with no case feature an no adjunction allowed (e.g. (^S DP...)) + "(" "^" category = Cat() treelist=TreeList() ")" + { + TreeNode tree = new Tree(); + tree.setCategory(category); + tree.setChildren(treelist); + tree.setParentForTree(); + tree.setAdjConstraint(true); + return tree; + } + + | + + // TerminalNode with case feature (e.g. N|nom:'house') + category = Cat() ("{" feature=Feat() "}")? ":" "'" (terminal = Terminal())? "'" + { + TerminalNode node = new TerminalNode(terminal, category); + node.setCategory(category); + node.setFeature(feature); + return node; + } + +} + + +String Terminal() : +{ + Token word; + String terminal=null; +} +{ + word = <WORD> (terminal=Terminal())? + { + if (terminal != null) return word.toString() + " " + terminal; + return word.toString(); + } +} + +List<TreeNode> TreeList() : +{ + List<TreeNode> treelist = null; + TreeNode tree; +} +{ + + tree = Tree() (treelist=TreeList())? + { + if (treelist == null) + { + treelist = new ArrayList<TreeNode>(); + } + + treelist.add(0,tree); + return treelist; + } +} + + +Category Cat() : +{ + Token cat; +} +{ + cat=<CATEGORY> + { + if (cat.toString().equals("DP")) return Category.DP; + if (cat.toString().equals("NP")) return Category.NP; + if (cat.toString().equals("N")) return Category.N; + if (cat.toString().equals("S")) return Category.S; + if (cat.toString().equals("V")) return Category.V; + if (cat.toString().equals("P")) return Category.P; + if (cat.toString().equals("VP")) return Category.VP; + if (cat.toString().equals("PP")) return Category.PP; + if (cat.toString().equals("DET")) return Category.DET; + if (cat.toString().equals("WH")) return Category.WH; + if (cat.toString().equals("ADV")) return Category.ADV; + if (cat.toString().equals("ADJ")) return Category.ADJ; + if (cat.toString().equals("ADJCOMP")) return Category.ADJCOMP; + if (cat.toString().equals("PART")) return Category.PART; + if (cat.toString().equals("PUNCT")) return Category.PUNCT; + if (cat.toString().equals("CC")) return Category.CC; + if (cat.toString().equals("EX")) return Category.EX; + if (cat.toString().equals("NUM")) return Category.NUM; + if (cat.toString().equals("C")) return Category.C; + if (cat.toString().equals("NEG")) return Category.NEG; + } + +} + +Feature Feat() : +{ + Token raw; +} +{ + + raw=<WORD> + { + + return Feature.construct(raw.toString()); + } +} + +TOKEN: {<WORD: (["a"-"z"]|["0"-"9"]|["?"]|["-"]|["_"]|["!"]|[","]|[";"]|["."]|[":"]|["/"])+>} + +TOKEN: {<CATEGORY: (["A"-"Z"])+>} + +SKIP : { " " | "\t" | "\n" | "\r" } + + + Property changes on: trunk/components-ext/src/main/javacc/LTAG_Parser.jj ___________________________________________________________________ Added: svn:executable + * Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-09 15:28:25 UTC (rev 3698) @@ -90,7 +90,7 @@ the least || (DET DET:'the' DET:'least') || <y, l1, e, [ l1:[ | l2:[ y | ] THELEAST y l3:[|] ] ], [], [],[]> // NECESSARY "CHEAT" - highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> ;; <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/PROPERTY/height ]> + highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/PROPERTY/height ]> ;; <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> // COUNT more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count_greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> @@ -125,6 +125,7 @@ what || (DP WH:'what') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> which || (DP WH:'which') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] HOWMANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[]> + how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ ?y | ] ], [ (l4,y,noun,<e,t>) ], [ l4=l1 ],[]> who || (DP WH:'who') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> whom || (DP WH:'whom') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> when || (S WH:'when' S[s]) || <x, l1, t, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/date ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ge...@us...> - 2012-07-06 08:24:29
|
Revision: 3773 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3773&view=rev Author: gerbsen Date: 2012-07-06 08:24:21 +0000 (Fri, 06 Jul 2012) Log Message: ----------- code for extracting properties from diadem text and made a change to newer lucene version 3.6 Modified Paths: -------------- trunk/components-ext/pom.xml Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/DiademPropertyFinder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/Word.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/WordFrequencyCounter.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2012-07-05 07:24:27 UTC (rev 3772) +++ trunk/components-ext/pom.xml 2012-07-06 08:24:21 UTC (rev 3773) @@ -43,6 +43,18 @@ <groupId>org.apache.solr</groupId> <artifactId>solr-core</artifactId> </dependency> + + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers</artifactId> + <version>3.5.0</version> + </dependency> + + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-core</artifactId> + <version>3.5.0</version> + </dependency> <!--BEGIN Logging Dependencies--> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/DiademPropertyFinder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/DiademPropertyFinder.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/DiademPropertyFinder.java 2012-07-06 08:24:21 UTC (rev 3773) @@ -0,0 +1,31 @@ +package org.dllearner.algorithm.tbsl.diadem; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.io.FileUtils; + +import edu.stanford.nlp.util.StringUtils; + +/** + * + */ +public class DiademPropertyFinder { + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + + List<String> lines = FileUtils.readLines(new File("/Users/gerb/Development/workspaces/experimental/diadem/descriptions.txt")); + String allDEscriptions = StringUtils.join(lines, " "); + + WordFrequencyCounter wfc = new WordFrequencyCounter(); + for ( Word word : wfc.getKeywordsSortedByFrequency(allDEscriptions)) { + + System.out.println(word.getWord() + ":\t" + word.getFrequency()); + } + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/Word.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/Word.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/Word.java 2012-07-06 08:24:21 UTC (rev 3773) @@ -0,0 +1,112 @@ +/** + * + */ +package org.dllearner.algorithm.tbsl.diadem; + +/** + * @author Daniel Gerber <dg...@in...> + * + */ +public class Word implements Comparable<Word> { + + public boolean isFromWikipedia() { + + return isFromWikipedia; + } + + private String word; + private int frequency; + private boolean isFromWikipedia; // Is that term extracted from a Wikipedia + // article + + public Word(String word, int frequency, boolean fromWikipedia) { + + isFromWikipedia = fromWikipedia; + this.word = word; + this.frequency = frequency; + } + + public Word(String word, int count) { + + this(word, count, false); + } + + /** + * Increases the total frequency with 1 + * + * @return The new frequency + */ + public int incrementFrequency() { + + return ++frequency; + } + + public int compareTo(Word otherWord) { + + if (this.frequency == otherWord.frequency) { + return this.word.compareTo(otherWord.word); + } + return otherWord.frequency - this.frequency; + } + + public String getWord() { + + return word; + } + + public int getFrequency() { + + return frequency; + } + + @Override + public String toString() { + + return word; + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + + final int prime = 31; + int result = 1; + result = prime * result + ((word == null) ? 0 : word.hashCode()); + return result; + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Word other = (Word) obj; + if (word == null) { + if (other.word != null) + return false; + } + else + if (!word.equals(other.word)) + return false; + return true; + } + + public Word setFrequency(int i) { + + this.frequency = i; + return this; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/WordFrequencyCounter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/WordFrequencyCounter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/diadem/WordFrequencyCounter.java 2012-07-06 08:24:21 UTC (rev 3773) @@ -0,0 +1,82 @@ +package org.dllearner.algorithm.tbsl.diadem; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; +import org.apache.lucene.analysis.ngram.NGramTokenFilter; +import org.apache.lucene.analysis.shingle.ShingleFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +/** + * + * @author Daniel Gerber <dg...@in...> + * + */ +public class WordFrequencyCounter { + + private List<String> stopwords = new ArrayList<String>(); + public WordFrequencyCounter(){ + +// stopwords.addAll(Arrays.asList()); + } + + /** + * + * @param inputWords + * @return + */ + public ArrayList<Word> getKeywordsSortedByFrequency(String inputWords){ + + PatternAnalyzer keywordAnalyzer = PatternAnalyzer.EXTENDED_ANALYZER; + TokenStream pageTokens = keywordAnalyzer.tokenStream("", inputWords); + CharTermAttribute charTermAttribute = pageTokens.getAttribute(CharTermAttribute.class); + ArrayList<String> tokens = new ArrayList<String>(1000); + + ShingleFilter filter = new ShingleFilter(pageTokens, 2, 3); + + try{ + + while (filter.incrementToken()) { + + // we need to filter these stop words, mostly references in wikipedia + String token = charTermAttribute.toString(); + if ( token.length() > 2 && !stopwords.contains(token) ) tokens.add(token.trim()); + } + } + catch (IOException exp){ + + exp.printStackTrace(); + } + + HashMap<String,Word> map = new HashMap<String,Word>(); + for(String token : tokens){ + + Word word = map.get(token); + if ( word == null ) { + + word = new Word(token,1); + map.put(token, word); + } + else word.incrementFrequency(); + } + // sort the values by there frequency and return them + ArrayList<Word> sortedKeywordList = new ArrayList<Word>(map.values()); + Collections.sort(sortedKeywordList); + + Iterator<Word> wordsIterator = sortedKeywordList.iterator(); + while ( wordsIterator.hasNext() ) { + + Word word = wordsIterator.next(); + if ( word.getFrequency() <= 10 ) wordsIterator.remove(); + } + + return sortedKeywordList; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-20 12:44:47
|
Revision: 3801 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3801&view=rev Author: lorenz_b Date: 2012-07-20 12:44:41 +0000 (Fri, 20 Jul 2012) Log Message: ----------- Some changes for precomputing metrics. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/pom.xml 2012-07-20 12:44:41 UTC (rev 3801) @@ -53,7 +53,6 @@ <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> - <version>3.5.0</version> </dependency> <!--BEGIN Logging Dependencies--> @@ -155,7 +154,6 @@ <dependency> <groupId>org.ini4j</groupId> <artifactId>ini4j</artifactId> - <version>0.5.2</version> </dependency> <dependency> <groupId>net.didion.jwnl</groupId> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-07-20 12:44:41 UTC (rev 3801) @@ -40,6 +40,7 @@ import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.expr.E_Equals; import com.hp.hpl.jena.sparql.expr.E_LogicalNot; import com.hp.hpl.jena.sparql.expr.ExprVar; @@ -744,10 +745,14 @@ String uri; QuerySolution qs; + RDFNode node; while(rs.hasNext()){ qs = rs.next(); - uri = qs.getResource("x0").getURI(); - resources.add(uri); + node = qs.get("x0"); + if(node.isURIResource()){ + uri = qs.getResource("x0").getURI(); + resources.add(uri); + } } return resources; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-20 12:44:41 UTC (rev 3801) @@ -286,6 +286,10 @@ this.mappingIndex = mappingIndex; } + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + public void setKnowledgebase(Knowledgebase knowledgebase){ this.endpoint = knowledgebase.getEndpoint(); this.resourcesIndex = knowledgebase.getResourceIndex(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-20 12:44:41 UTC (rev 3801) @@ -645,7 +645,7 @@ for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(varName)){ types.add(typeTriple.getValue().getName().replace(">", "").replace("<", "")); } - for(String type : types){System.out.println(type); + for(String type : types){ metrics.getGoodness(new NamedClass(type), new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), new Individual(object.getName().replace(">", "").replace("<", ""))); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2012-07-20 12:44:41 UTC (rev 3801) @@ -362,25 +362,25 @@ } } - for(NamedClass cls1 : classes){ - for(NamedClass cls2 : classes){ - if(!cls1.equals(cls2)){ - log.info("Processing class " + cls1 + " and class " + cls2); - try { - getPMI(cls1, cls2); - getPMI(cls2, cls1); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - } +// for(NamedClass cls1 : classes){ +// for(NamedClass cls2 : classes){ +// if(!cls1.equals(cls2)){ +// log.info("Processing class " + cls1 + " and class " + cls2); +// try { +// getPMI(cls1, cls2); +// getPMI(cls2, cls1); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } +// } +// } log.info("Done in " + ((System.currentTimeMillis() - startTime)/1000d) + "s"); } public static void main(String[] args) { - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); - ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/cache"); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/cache2"); String NS = "http://dbpedia.org/ontology/"; String NS_Res = "http://dbpedia.org/resource/"; Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-07-20 12:43:03 UTC (rev 3800) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-07-20 12:44:41 UTC (rev 3801) @@ -1,5 +1,4 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code -http://www.w3.org/2006/vcard/ns#locality|address, location http://purl.org/goodrelations/v1#description|description http://purl.org/goodrelations/v1#hasPrice|has price, price http://diadem.cs.ox.ac.uk/ontologies/real-estate#receptions|receptions, reception room, reception rooms \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-02-25 11:48:03
|
Revision: 3902 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3902&view=rev Author: lorenz_b Date: 2013-02-25 11:47:56 +0000 (Mon, 25 Feb 2013) Log Message: ----------- Updated Staford model loading. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2013-02-18 14:16:54 UTC (rev 3901) +++ trunk/components-ext/pom.xml 2013-02-25 11:47:56 UTC (rev 3902) @@ -91,12 +91,18 @@ <!--END Logging Dependencies--> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>1.3.3</version> + </dependency> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>1.3.3</version> + <classifier>models</classifier> + </dependency> <dependency> - <groupId>edu.stanford</groupId> - <artifactId>postagger</artifactId> - <version>3.0.2</version> - </dependency> - <dependency> <groupId>lbj</groupId> <artifactId>library</artifactId> <version>1.0</version> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java 2013-02-18 14:16:54 UTC (rev 3901) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordLemmatizer.java 2013-02-25 11:47:56 UTC (rev 3902) @@ -26,7 +26,7 @@ @Override public String stem(String word, String tag) { - return stemmer.stem(word, tag).word(); + return stemmer.lemma(word, tag); } @Override Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2013-02-18 14:16:54 UTC (rev 3901) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2013-02-25 11:47:56 UTC (rev 3902) @@ -1,36 +1,29 @@ package org.dllearner.algorithm.tbsl.nlp; -import java.io.IOException; -import java.io.StringReader; -import java.net.URISyntaxException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Properties; import com.aliasi.tag.Tagging; -import edu.stanford.nlp.ling.HasWord; -import edu.stanford.nlp.ling.TaggedWord; -import edu.stanford.nlp.tagger.maxent.MaxentTagger; +import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.util.CoreMap; public class StanfordPartOfSpeechTagger implements PartOfSpeechTagger{ - private static final String MODEL = "tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; + private StanfordCoreNLP pipeline; - private MaxentTagger tagger; - public StanfordPartOfSpeechTagger(){ - try { -// String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); - String modelPath = getClass().getResource("/tbsl/models/bidirectional-distsim-wsj-0-18.tagger").getPath(); -// String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getFile(); - tagger = new MaxentTagger(modelPath); - } catch (IOException e) { - e.printStackTrace(); - } catch (ClassNotFoundException e) { - e.printStackTrace(); - } + Properties props = new Properties(); + props.put("annotators", "tokenize, ssplit, pos"); + pipeline = new StanfordCoreNLP(props); } @Override @@ -39,68 +32,94 @@ } @Override - public String tag(String sentence) { + public String tag(String text) { String out = ""; - ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + // create an empty Annotation just with the given text + Annotation document = new Annotation(text); + + // run all Annotators on this text + pipeline.annotate(document); + + // these are all the sentences in this document + // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types + List<CoreMap> sentences = document.get(SentencesAnnotation.class); + + for(CoreMap sentence: sentences) { + for (CoreLabel token: sentence.get(TokensAnnotation.class)) { + // this is the text of the token + String word = token.get(TextAnnotation.class); + // this is the POS tag of the token + String pos = token.get(PartOfSpeechAnnotation.class); + + out += " " + word + "/" + pos; + } + } - StringReader reader = new StringReader(sentence); - List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); - - if (text.size() == 1) { - tagged = tagger.tagSentence(text.get(0)); - } - - for (TaggedWord t : tagged) { - out += " " + t.toString(); - } return out.trim(); } + + @Override public List<String> tagTopK(String sentence) { return Collections.singletonList(tag(sentence)); } - public List<String> getTags(String sentence){ + public List<String> getTags(String text){ List<String> tags = new ArrayList<String>(); - ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + // create an empty Annotation just with the given text + Annotation document = new Annotation(text); + + // run all Annotators on this text + pipeline.annotate(document); + + // these are all the sentences in this document + // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types + List<CoreMap> sentences = document.get(SentencesAnnotation.class); + + for(CoreMap sentence: sentences) { + for (CoreLabel token: sentence.get(TokensAnnotation.class)) { + // this is the text of the token + String word = token.get(TextAnnotation.class); + // this is the POS tag of the token + String pos = token.get(PartOfSpeechAnnotation.class); + + tags.add(pos); + } + } - StringReader reader = new StringReader(sentence); - List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); - - if (text.size() == 1) { - tagged = tagger.tagSentence(text.get(0)); - } - - for(TaggedWord tW : tagged){ - tags.add(tW.tag()); - } - return tags; } @Override - public Tagging<String> getTagging(String sentence){ - ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); - - StringReader reader = new StringReader(sentence); - List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); - - if (text.size() == 1) { - tagged = tagger.tagSentence(text.get(0)); - } - + public Tagging<String> getTagging(String text){ List<String> tokenList = new ArrayList<String>(); List<String> tagList = new ArrayList<String>(); - for(TaggedWord tW : tagged){ - tokenList.add(tW.word()); - tagList.add(tW.tag()); - } + // create an empty Annotation just with the given text + Annotation document = new Annotation(text); + + // run all Annotators on this text + pipeline.annotate(document); + + // these are all the sentences in this document + // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types + List<CoreMap> sentences = document.get(SentencesAnnotation.class); + + for(CoreMap sentence: sentences) { + for (CoreLabel token: sentence.get(TokensAnnotation.class)) { + // this is the text of the token + String word = token.get(TextAnnotation.class); + // this is the POS tag of the token + String pos = token.get(PartOfSpeechAnnotation.class); + + tokenList.add(word); + tagList.add(pos); + } + } return new Tagging<String>(tokenList, tagList); } - } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2013-02-18 14:16:54 UTC (rev 3901) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2013-02-25 11:47:56 UTC (rev 3902) @@ -9,6 +9,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.NamedClass; @@ -34,8 +35,8 @@ public SPARQLEndpointMetrics(SparqlEndpoint endpoint, ExtractionDBCache cache) { this.endpoint = endpoint; this.cache = cache; - cache.setFreshnessInMilliseconds(31536000000l); - cache.setMaxExecutionTimeInSeconds(30); + cache.setFreshnessInMilliseconds(Long.MAX_VALUE);//31536000000l); + cache.setMaxExecutionTimeInSeconds(300); this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); } @@ -214,6 +215,32 @@ } /** + * Returns the number of triples where the given individual is in subject position(out-going links). + * @param cls + * @return + */ + public int getOccurencesInSubjectPosition(Individual ind){ + log.trace(String.format("Computing number of occurences in subject position for %s", ind.getName())); + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {<%s> ?p ?o.}", ind.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** + * Returns the number of triples where the given individual is in object position(in-going links). + * @param cls + * @return + */ + public int getOccurencesInObjectPosition(Individual ind){ + log.trace(String.format("Computing number of occurences in object position for %s", ind.getName())); + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p <%s>.}", ind.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** * Returns the number triples with the given property as predicate. * @param prop * @return @@ -394,8 +421,9 @@ } public static void main(String[] args) { - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/dbpedia_pmi_cache"); + Logger.getLogger(SPARQLEndpointMetrics.class).setLevel(Level.DEBUG); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveOpenLink(); + ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/dbpedia_pmi_cache_v2"); String NS = "http://dbpedia.org/ontology/"; String NS_Res = "http://dbpedia.org/resource/"; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2013-02-18 14:16:54 UTC (rev 3901) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2013-02-25 11:47:56 UTC (rev 3902) @@ -3,6 +3,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.net.URL; import java.util.Collections; @@ -38,9 +39,9 @@ @Override protected void setUp() throws Exception { super.setUp(); - endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); -// model = ModelFactory.createOntologyModel(); -// File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); + endpoint = new SparqlEndpoint(new URL("http://[2001:638:902:2010:0:168:35:138]/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + model = ModelFactory.createOntologyModel(); +// File dir = new File("/home/me/work/papers/question-answering-iswc-2012/data_v2"); // try { // for(File f : dir.listFiles()){ // if(f.isFile()){ @@ -53,6 +54,7 @@ // } // } // } +// model.write(new FileOutputStream(dir.getAbsolutePath() + "/oxford-data.ttl"), "TURTLE", null); // model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); // } catch (FileNotFoundException e) { // e.printStackTrace(); @@ -88,6 +90,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(model, resourcesIndex, classesIndex, propertiesIndex); learner.init(); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); String question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; @@ -117,9 +120,10 @@ learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); String question = "Give me all houses near a school."; - question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; - question = "Give me all Victorian houses in Oxfordshire"; - question = "Edwardian houses close to supermarket for less than 1,000,000 in Oxfordshire"; + question = "Give me all houses with more than 3 bathrooms."; + question = "houses at walking distance from a pharmacy"; +// question = "Give me all Victorian houses in Oxfordshire"; +// question = "Edwardian houses close to supermarket for less than 1,000,000 in Oxfordshire"; // question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |