From: <dfl...@us...> - 2013-11-21 12:44:12
|
Revision: 4159 http://sourceforge.net/p/dl-learner/code/4159 Author: dfleischhacker Date: 2013-11-21 12:44:09 +0000 (Thu, 21 Nov 2013) Log Message: ----------- Re-implement TextDocument based on Tokens Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 12:36:47 UTC (rev 4158) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 12:44:09 UTC (rev 4159) @@ -7,6 +7,63 @@ * * @author Daniel Fleischhacker */ -public class TextDocument extends LinkedList<Token> { +public class TextDocument extends LinkedList<Token> implements Document { + @Override + public String getContent() { + return getContentStartingAtToken(this.getFirst(), Level.STEMMED); + } + @Override + public String getRawContent() { + return getContentStartingAtToken(this.getFirst(), Level.RAW); + } + + @Override + public String getPOSTaggedContent() { + return getContentStartingAtToken(this.getFirst(), Level.POS_TAGGED); + } + + public static enum Level { + RAW, + POS_TAGGED, + STEMMED + } + + /** + * Returns a string containing all tokens starting at the token {@code start} until the end of the list. The + * surface forms according to {@code level} are used to build the string. + * + * @param start token to start building the string at, i.e., the first token in the returned string + * @param l level of surface forms to use + * @return built string + */ + public String getContentStartingAtToken(Token start, Level l) { + StringBuilder sb = new StringBuilder(); + boolean found = false; + for (Token t : this) { + if (found) { + sb.append(" "); + sb.append(getStringForLevel(t, l)); + } + else if (t == start) { + found = true; + sb.append(getStringForLevel(t, l)); + } + } + + return sb.toString(); + } + + private String getStringForLevel(Token t, Level l) { + switch (l) { + case RAW: + return t.getRawForm(); + case POS_TAGGED: + return t.getPOSTag(); + case STEMMED: + return t.getStemmedForm(); + } + + return null; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |