From: <jen...@us...> - 2013-09-03 15:55:07
|
Revision: 4038 http://sourceforge.net/p/dl-learner/code/4038 Author: jenslehmann Date: 2013-09-03 15:55:04 +0000 (Tue, 03 Sep 2013) Log Message: ----------- simple linguistic annotator Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-03 15:55:04 UTC (rev 4038) @@ -0,0 +1,29 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * + * @author Jens Lehmann + * + */ +public class SimpleLinguisticAnnotator implements LinguisticAnnotator { + + @Override + public Set<Annotation> annotate(Document document) { + String s = document.getRawContent(); + Set<Annotation> annotations = new HashSet<Annotation>(); + Pattern pattern = Pattern.compile(" "); + Matcher matcher = pattern.matcher(s); + // Check all occurrences + while (matcher.find()) { + annotations.add(new Annotation(document, matcher.start(), + matcher.end() - matcher.start())); + } + return annotations; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |