From: <lor...@us...> - 2013-11-21 12:25:43
|
Revision: 4156 http://sourceforge.net/p/dl-learner/code/4156 Author: lorenz_b Date: 2013-11-21 12:25:40 +0000 (Thu, 21 Nov 2013) Log Message: ----------- Added token class. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/WordTypeComparator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 11:54:23 UTC (rev 4155) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-11-21 12:25:40 UTC (rev 4156) @@ -29,7 +29,7 @@ } private void buildCleanedContent(){ - this.content = content.toLowerCase(); + this.content = rawContent.toLowerCase(); this.content = this.content.replaceAll("[^a-z ]", " "); this.content = this.content.replaceAll("\\s{2,}", " "); this.content = this.content.trim(); Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java 2013-11-21 12:25:40 UTC (rev 4156) @@ -0,0 +1,64 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +/** + * @author Lorenz Buehmann + * + */ +public class Token { + + private String rawForm; + private String stemmedForm; + private String posTag; + + public Token(String rawForm) { + posTag = rawForm; + } + + /** + * @return the rawForm + */ + public String getRawForm() { + return rawForm; + } + + /** + * @return the stemmedForm + */ + public String getStemmedForm() { + return stemmedForm; + } + + /** + * @return the posTag + */ + public String getPOSTag() { + return posTag; + } + + /** + * @param stemmedForm the stemmedForm to set + */ + public void setStemmedForm(String stemmedForm) { + this.stemmedForm = stemmedForm; + } + + /** + * @param posTag the posTag to set + */ + public void setPOSTag(String posTag) { + this.posTag = posTag; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "Word: " + rawForm + "\n" + + "Stemmed word: " + stemmedForm + "\n" + + "POS tag: " + posTag; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/WordTypeComparator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/WordTypeComparator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/WordTypeComparator.java 2013-11-21 12:25:40 UTC (rev 4156) @@ -0,0 +1,27 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +/** + * Compare the word types of two given words. + * @author Lorenz Buehmann + * + */ +public class WordTypeComparator { + + /** + * Returns TRUE if both POS tags are related to the same word type, i.e. whether both are NOUNS, VERBS, etc. , + * else FALSE is returned. + * @param posTag1 the POS tag of the first word + * @param posTag2 the POS tag of the second word + * @return + */ + public static boolean sameWordType(String posTag1, String posTag2){ + if(posTag1.startsWith("NN") && posTag2.startsWith("NN") || + posTag1.startsWith("V") && posTag2.startsWith("V")){ + return true; + } + return false; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |