From: <dfl...@us...> - 2013-11-21 13:52:48
|
Revision: 4167 http://sourceforge.net/p/dl-learner/code/4167 Author: dfleischhacker Date: 2013-11-21 13:52:45 +0000 (Thu, 21 Nov 2013) Log Message: ----------- Remove SimpleLinguisticAnnotator Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-11-21 13:40:31 UTC (rev 4166) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-11-21 13:52:45 UTC (rev 4167) @@ -1,62 +0,0 @@ -package org.dllearner.algorithms.isle.index; - -import java.io.IOException; -import java.io.StringReader; -import java.util.HashSet; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.en.PorterStemFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.util.Version; -import org.dllearner.algorithms.isle.StopWordFilter; - -/** - * - * @author Jens Lehmann - * - */ -public class SimpleLinguisticAnnotator implements LinguisticAnnotator { - - private StopWordFilter stopWordFilter = new StopWordFilter(); - NGramGeneratingAnnotator nGramAnnotator = new NGramGeneratingAnnotator(2); - - @Override - public Set<Annotation> annotate(Document document) { - String s = document.getContent().trim(); - System.out.println("Document:" + s); -// s = stopWordFilter.removeStopWords(s); - Set<Annotation> annotations = new HashSet<Annotation>(); - Pattern pattern = Pattern.compile("(\\u0020)+"); - Matcher matcher = pattern.matcher(s); - // Check all occurrences - int start = 0; - while (matcher.find()) { - int end = matcher.start(); - annotations.add(new Annotation(document, start, end - start)); - start = matcher.end(); - } - if(start < s.length()-1){ - annotations.add(new Annotation(document, start, s.length() - start)); - } - annotations.addAll(nGramAnnotator.annotate(document)); -// stopWordFilter.removeStopWordAnnotations(annotations); - return annotations; - } - - public static void main(String[] args) throws Exception { - String s = "male person least 1 child"; - Pattern pattern = Pattern.compile("(\\u0020)+"); - Matcher matcher = pattern.matcher(s); - int start = 0; - while (matcher.find()) { - int end = matcher.start(); - System.out.println(s.substring(start, end)); - start = matcher.end(); - } - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |