From: <chr...@us...> - 2012-06-15 14:46:56
|
Revision: 3750 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3750&view=rev Author: christinaunger Date: 2012-06-15 14:46:46 +0000 (Fri, 15 Jun 2012) Log Message: ----------- [tbsl] a few more lexical extensions Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -470,7 +470,7 @@ for (DiscourseReferent dr : cond.getArguments()) { if (dr.getValue().equals(var)) { takeit = true; - for (String f : forbidden) if (f.equals(cond.getPredicate())) takeit= false; + for (String f : forbidden) if (cond.getPredicate().contains(f)) takeit= false; } } if (takeit) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -96,6 +96,7 @@ Pattern whenPattern = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern wherePattern = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern adjsPattern = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)"); + Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?)"); Pattern adjnprepPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)"); @@ -217,7 +218,12 @@ while (m.find()) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); - } + } + m = adjnnpPattern.matcher(condensedstring); + while (m.find()) { + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NNP"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NNP"); + } m = adjnounPattern.matcher(condensedstring); while (m.find()) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -79,14 +79,16 @@ } // treetoken String treetoken = "N:'" + token.toLowerCase() + "'"; - if (token.trim().contains(" ")) { - String[] tokenParts = token.split(" "); - treetoken = ""; - for (String t : tokenParts) { - treetoken += " N:'" + t.toLowerCase() + "'"; - } - treetoken = treetoken.trim(); + String[] tokenParts; + if (token.trim().contains(" ")) tokenParts = token.split(" "); +// else if (token.contains("_")) tokenParts = token.split("_"); + else { tokenParts = new String[1]; tokenParts[0] = token; } + treetoken = ""; + for (String t : tokenParts) { + treetoken += " N:'" + t.toLowerCase() + "'"; } + treetoken = treetoken.trim(); + // if (pos.equals("NN") || pos.equals("NNS")) { /* DP */ @@ -165,7 +167,7 @@ else if(pos.equals("JJNN") && token.contains("_")) { String[] tokens = token.split("_"); String nntoken = tokens[tokens.length-1]; - String jjtoken = token.replace("SLOT_","").replace(nntoken,"").replace("_"," ").trim(); + String jjtoken = token.replace("SLOT_","").replace(nntoken,"").trim(); String slotfluent = "SLOT_" + tokenfluent + "/CLASS/" + token; String slotnn = "SLOT_" + nntoken + "/CLASS/" + nntoken; // String semantics = "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> " + @@ -302,6 +304,10 @@ String[] wasGerEntry = {token, "(S DP[comp] (VP V:'was' DP[subject] V:'" + token + "'))", "<y,l1,t,[ l1:[ | SLOT_" + token + "(y,z) ] ],[(l2,y,comp,<<e,t>,t>), (l3,z,subject,<<e,t>,t>) ],[ l2=l1, l3=l1 ],[" + symslot + "]>"}; + String[] adjEntry = {token, + "(NP ADJ:'"+token+"' NP*)", + "<x,l1,<e,t>,[ l1:[ | SLOT_description(x,y), regex(y,'"+ token +"')] ],[],[],[ SLOT_description/DATATYPEPROPERTY/description ]>"}; + result.add(adjEntry); result.add(gerEntry); result.add(wasGerEntry); } @@ -331,6 +337,8 @@ result.add(whereEntry1); result.add(whereEntry2); } + + // TODO relative clauses missing! } /* ADJECTIVES */ Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:46:46 UTC (rev 3750) @@ -2,12 +2,18 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> + near || (NP NP* (PP P:'near' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + nearby || (NP NP* (PP P:'nearby' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_nearby(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_nearby/OBJECTPROPERTY/near ]> + within walking distance from || (NP NP* (PP P:'within' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + within minutes of || (NP NP* (PP P:'within' (NP N:'minutes' P:'of' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + in walking distance from || (NP NP* (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |