From: <dfl...@us...> - 2013-09-04 09:53:01
|
Revision: 4052 http://sourceforge.net/p/dl-learner/code/4052 Author: dfleischhacker Date: 2013-09-04 09:52:57 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Add n-grams to annotations from SimpleLinguisticAnnotator Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java 2013-09-04 09:45:38 UTC (rev 4051) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NGramGeneratingAnnotator.java 2013-09-04 09:52:57 UTC (rev 4052) @@ -24,7 +24,6 @@ public Set<Annotation> annotate(Document document) { String text = document.getContent(); - Pattern legalChars = Pattern.compile("[A-Za-z]"); // clean up all texts Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 09:45:38 UTC (rev 4051) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleLinguisticAnnotator.java 2013-09-04 09:52:57 UTC (rev 4052) @@ -22,6 +22,7 @@ public class SimpleLinguisticAnnotator implements LinguisticAnnotator { private StopWordFilter stopWordFilter = new StopWordFilter(); + NGramGeneratingAnnotator nGramAnnotator = new NGramGeneratingAnnotator(2); @Override public Set<Annotation> annotate(Document document) { @@ -41,6 +42,7 @@ if(start < s.length()-1){ annotations.add(new Annotation(document, start, s.length() - start)); } + annotations.addAll(nGramAnnotator.annotate(document)); stopWordFilter.removeStopWordAnnotations(annotations); return annotations; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |