From: <dfl...@us...> - 2013-11-21 13:00:36
|
Revision: 4162 http://sourceforge.net/p/dl-learner/code/4162 Author: dfleischhacker Date: 2013-11-21 13:00:33 +0000 (Thu, 21 Nov 2013) Log Message: ----------- Ignore punctuation in stemmed text Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 12:57:10 UTC (rev 4161) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 13:00:33 UTC (rev 4162) @@ -37,7 +37,10 @@ for (Token t : this) { if (found) { sb.append(" "); - sb.append(getStringForLevel(t, l)); + String surfaceForm = getStringForLevel(t, l); + if (surfaceForm != null) { + sb.append(surfaceForm); + } } else if (t == start) { found = true; @@ -55,9 +58,19 @@ case POS_TAGGED: return t.getPOSTag(); case STEMMED: - return t.getStemmedForm(); + return t.isPunctuation() ? null : t.getStemmedForm(); } return null; } + + public static void main(String[] args) { + TextDocument t = new TextDocument(); + String s = "This is a very long, nice text for testing our new implementation of TextDocument."; + for (String e : s.split(" ")) { + t.add(new Token(e)); + } + + System.out.println(t.getRawContent()); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |