Revision: 13881
http://gate.svn.sourceforge.net/gate/?rev=13881&view=rev
Author: markagreenwood
Date: 2011-06-01 17:30:48 +0000 (Wed, 01 Jun 2011)
Log Message:
-----------
a new GENIA plugin which includes a sentence splitter PR, together with the tagger framework we can now host the GENIA pipeline within GATE, and this plugin also contains a xgapp doing just that
Added Paths:
-----------
gate/trunk/plugins/GENIA/
gate/trunk/plugins/GENIA/build.xml
gate/trunk/plugins/GENIA/creole.xml
gate/trunk/plugins/GENIA/genia.xgapp
gate/trunk/plugins/GENIA/src/
gate/trunk/plugins/GENIA/src/gate/
gate/trunk/plugins/GENIA/src/gate/creole/
gate/trunk/plugins/GENIA/src/gate/creole/genia/
gate/trunk/plugins/GENIA/src/gate/creole/genia/splitter/
gate/trunk/plugins/GENIA/src/gate/creole/genia/splitter/GENIASentenceSplitter.java
Added: gate/trunk/plugins/GENIA/build.xml
===================================================================
--- gate/trunk/plugins/GENIA/build.xml (rev 0)
+++ gate/trunk/plugins/GENIA/build.xml 2011-06-01 17:30:48 UTC (rev 13881)
@@ -0,0 +1,77 @@
+<project name="GENIA" basedir=".">
+ <property file="build.properties" />
+
+ <property name="gate.home" location="../.." />
+ <property name="gate.lib" location="${gate.home}/lib" />
+ <property name="src.dir" location="src" />
+ <property name="classes.dir" location="classes" />
+ <property name="jar.location" location="GENIA.jar" />
+ <property name="doc.dir" location="doc" />
+ <property name="javadoc.dir" location="${doc.dir}/javadoc" />
+
+ <!-- Path to compile - includes gate.jar and GATE/lib/*.jar -->
+ <path id="compile.classpath">
+ <pathelement location="${gate.home}/bin/gate.jar" />
+ <fileset dir="${gate.lib}">
+ <include name="**/*.jar" />
+ <include name="**/*.zip" />
+ </fileset>
+ </path>
+
+ <!-- create build directory structure -->
+ <target name="prepare">
+ <mkdir dir="${classes.dir}" />
+ </target>
+
+ <!-- compile the source -->
+ <target name="compile" depends="prepare">
+ <javac classpathref="compile.classpath" srcdir="${src.dir}" destdir="${classes.dir}" debug="true" debuglevel="lines,source" encoding="UTF-8" source="1.5" target="1.5" />
+ </target>
+
+ <target name="resources" depends="prepare">
+ <!--<copy todir="${classes.dir}/gate/resources" includeEmptyDirs="true">
+ <fileset dir="${src.dir}/gate/resources" />
+ </copy>-->
+ </target>
+
+ <!-- create the JAR file -->
+ <target name="jar" depends="compile, resources">
+ <jar destfile="${jar.location}" update="false" basedir="${classes.dir}" />
+ </target>
+
+ <!-- remove the generated .class files -->
+ <target name="clean.classes">
+ <delete dir="${classes.dir}" />
+ </target>
+
+ <!-- Clean up - remove .class and .jar files -->
+ <target name="clean" depends="clean.classes">
+ <delete file="${jar.location}" />
+ </target>
+
+ <!-- Targets used by the main GATE build file:
+ build: build the plugin - just calls "jar" target
+ test : run the unit tests - there aren't any
+ distro.prepare: remove intermediate files that shouldn't be in the
+ distribution
+ -->
+
+ <!-- Build JavaDoc documentation -->
+ <target name="doc.prepare">
+ <mkdir dir="${javadoc.dir}" />
+ </target>
+
+ <target name="javadoc" depends="doc.prepare">
+ <javadoc destdir="${javadoc.dir}" packagenames="*" classpathref="compile.classpath" encoding="UTF-8" windowtitle="GENIA JavaDoc" source="1.5">
+ <sourcepath>
+ <pathelement location="${src.dir}" />
+ </sourcepath>
+ <link href="http://java.sun.com/j2se/1.5.0/docs/api/" />
+ <link href="http://gate.ac.uk/gate/doc/javadoc/" />
+ </javadoc>
+ </target>
+
+ <target name="build" depends="jar" />
+ <target name="test" />
+ <target name="distro.prepare" depends="clean.classes" />
+</project>
Added: gate/trunk/plugins/GENIA/creole.xml
===================================================================
--- gate/trunk/plugins/GENIA/creole.xml (rev 0)
+++ gate/trunk/plugins/GENIA/creole.xml 2011-06-01 17:30:48 UTC (rev 13881)
@@ -0,0 +1,3 @@
+<CREOLE-DIRECTORY>
+ <JAR SCAN="true">GENIA.jar</JAR>
+</CREOLE-DIRECTORY>
Added: gate/trunk/plugins/GENIA/genia.xgapp
===================================================================
--- gate/trunk/plugins/GENIA/genia.xgapp (rev 0)
+++ gate/trunk/plugins/GENIA/genia.xgapp 2011-06-01 17:30:48 UTC (rev 13881)
@@ -0,0 +1,264 @@
+<gate.util.persistence.GateApplication>
+ <urlList class="gate.util.persistence.CollectionPersistence">
+ <localList>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../GENIA/</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../Tagger_Framework/</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../ANNIE/</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </localList>
+ <collectionType>java.util.ArrayList</collectionType>
+ </urlList>
+ <application class="gate.util.persistence.ConditionalSerialAnalyserControllerPersistence">
+ <strategiesList class="gate.util.persistence.CollectionPersistence">
+ <localList>
+ <gate.util.persistence.AnalyserRunningStrategyPersistence>
+ <runMode>1</runMode>
+ <featureName></featureName>
+ <featureValue></featureValue>
+ <pr class="gate.util.persistence.PRPersistence">
+ <runtimeParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap>
+ <entry>
+ <string>setsToKeep</string>
+ <gate.util.persistence.CollectionPersistence>
+ <localList/>
+ <collectionType>java.util.ArrayList</collectionType>
+ </gate.util.persistence.CollectionPersistence>
+ </entry>
+ <entry>
+ <string>document</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>annotationTypes</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>corpus</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>keepOriginalMarkupsAS</string>
+ <boolean>true</boolean>
+ </entry>
+ <entry>
+ <string>setsToRemove</string>
+ <null/>
+ </entry>
+ </localMap>
+ </runtimeParams>
+ <resourceType>gate.creole.annotdelete.AnnotationDeletePR</resourceType>
+ <resourceName>Document Reset PR</resourceName>
+ <initParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </initParams>
+ <features class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </features>
+ </pr>
+ </gate.util.persistence.AnalyserRunningStrategyPersistence>
+ <gate.util.persistence.AnalyserRunningStrategyPersistence>
+ <runMode>1</runMode>
+ <featureName></featureName>
+ <featureValue></featureValue>
+ <pr class="gate.util.persistence.LanguageAnalyserPersistence">
+ <runtimeParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap>
+ <entry>
+ <string>document</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>debug</string>
+ <boolean>false</boolean>
+ </entry>
+ <entry>
+ <string>corpus</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>splitterBinary</string>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../../../../../geniass/geniass</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </entry>
+ <entry>
+ <string>annotationSetName</string>
+ <null/>
+ </entry>
+ </localMap>
+ </runtimeParams>
+ <resourceType>gate.creole.genia.splitter.GENIASentenceSplitter</resourceType>
+ <resourceName>GENIA Sentence Splitter</resourceName>
+ <initParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </initParams>
+ <features class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </features>
+ </pr>
+ </gate.util.persistence.AnalyserRunningStrategyPersistence>
+ <gate.util.persistence.AnalyserRunningStrategyPersistence>
+ <runMode>1</runMode>
+ <featureName></featureName>
+ <featureValue></featureValue>
+ <pr class="gate.util.persistence.PRPersistence">
+ <runtimeParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap>
+ <entry>
+ <string>outputAnnotationType</string>
+ <string>Token</string>
+ </entry>
+ <entry>
+ <string>inputASName</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>regex</string>
+ <string>(.+) (.+) (.+) (.+) (.+)</string>
+ </entry>
+ <entry>
+ <string>featureMapping</string>
+ <gate.util.persistence.MapPersistence>
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap>
+ <entry>
+ <string>lemma</string>
+ <string>2</string>
+ </entry>
+ <entry>
+ <string>chunk</string>
+ <string>4</string>
+ </entry>
+ <entry>
+ <string>category</string>
+ <string>3</string>
+ </entry>
+ <entry>
+ <string>entity</string>
+ <string>5</string>
+ </entry>
+ <entry>
+ <string>string</string>
+ <string>1</string>
+ </entry>
+ </localMap>
+ </gate.util.persistence.MapPersistence>
+ </entry>
+ <entry>
+ <string>debug</string>
+ <boolean>false</boolean>
+ </entry>
+ <entry>
+ <string>updateAnnotations</string>
+ <boolean>false</boolean>
+ </entry>
+ <entry>
+ <string>taggerBinary</string>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../../../../../geniatagger-3.0.1/geniatagger</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </entry>
+ <entry>
+ <string>inputAnnotationType</string>
+ <string>Sentence</string>
+ </entry>
+ <entry>
+ <string>document</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>corpus</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>taggerDir</string>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../../../../../geniatagger-3.0.1/</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </entry>
+ <entry>
+ <string>inputTemplate</string>
+ <string>${string}</string>
+ </entry>
+ <entry>
+ <string>failOnUnmappableCharacter</string>
+ <boolean>false</boolean>
+ </entry>
+ <entry>
+ <string>outputASName</string>
+ <null/>
+ </entry>
+ <entry>
+ <string>encoding</string>
+ <string>ISO-8859-1</string>
+ </entry>
+ <entry>
+ <string>taggerFlags</string>
+ <gate.util.persistence.CollectionPersistence>
+ <localList/>
+ <collectionType>java.util.ArrayList</collectionType>
+ </gate.util.persistence.CollectionPersistence>
+ </entry>
+ </localMap>
+ </runtimeParams>
+ <resourceType>gate.taggerframework.GenericTagger</resourceType>
+ <resourceName>GENIA Tagger</resourceName>
+ <initParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap>
+ <entry>
+ <string>preProcessURL</string>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../Tagger_Framework/resources/tagger-independent/sentencestring.jape</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </entry>
+ <entry>
+ <string>postProcessURL</string>
+ <gate.util.persistence.PersistenceManager-URLHolder>
+ <urlString>$relpath$../Tagger_Framework/resources/tagger-independent/chunkandentity-annots.jape</urlString>
+ </gate.util.persistence.PersistenceManager-URLHolder>
+ </entry>
+ </localMap>
+ </initParams>
+ <features class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </features>
+ </pr>
+ </gate.util.persistence.AnalyserRunningStrategyPersistence>
+ </localList>
+ <collectionType>java.util.ArrayList</collectionType>
+ </strategiesList>
+ <prList class="gate.util.persistence.CollectionPersistence">
+ <localList>
+ <gate.util.persistence.PRPersistence reference="../../../strategiesList/localList/gate.util.persistence.AnalyserRunningStrategyPersistence/pr"/>
+ <gate.util.persistence.LanguageAnalyserPersistence reference="../../../strategiesList/localList/gate.util.persistence.AnalyserRunningStrategyPersistence[2]/pr"/>
+ <gate.util.persistence.PRPersistence reference="../../../strategiesList/localList/gate.util.persistence.AnalyserRunningStrategyPersistence[3]/pr"/>
+ </localList>
+ <collectionType>java.util.ArrayList</collectionType>
+ </prList>
+ <resourceType>gate.creole.ConditionalSerialAnalyserController</resourceType>
+ <resourceName>GENIA</resourceName>
+ <initParams class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </initParams>
+ <features class="gate.util.persistence.MapPersistence">
+ <mapType>gate.util.SimpleFeatureMapImpl</mapType>
+ <localMap/>
+ </features>
+ </application>
+</gate.util.persistence.GateApplication>
\ No newline at end of file
Added: gate/trunk/plugins/GENIA/src/gate/creole/genia/splitter/GENIASentenceSplitter.java
===================================================================
--- gate/trunk/plugins/GENIA/src/gate/creole/genia/splitter/GENIASentenceSplitter.java (rev 0)
+++ gate/trunk/plugins/GENIA/src/gate/creole/genia/splitter/GENIASentenceSplitter.java 2011-06-01 17:30:48 UTC (rev 13881)
@@ -0,0 +1,111 @@
+package gate.creole.genia.splitter;
+
+import gate.AnnotationSet;
+import gate.Factory;
+import gate.creole.AbstractLanguageAnalyser;
+import gate.creole.ExecutionException;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+import gate.util.Files;
+import gate.util.ProcessManager;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.net.URL;
+
+@CreoleResource(name = "GENIA Sentence Splitter", icon = "sentence-splitter.png")
+public class GENIASentenceSplitter extends AbstractLanguageAnalyser {
+
+ private boolean debug = false;
+
+ private String annotationSetName;
+
+ private URL splitterBinary;
+
+ private ProcessManager manager = new ProcessManager();
+
+ public Boolean getDebug() {
+ return debug;
+ }
+
+ @RunTime
+ @CreoleParameter(defaultValue = "false")
+ public void setDebug(Boolean debug) {
+ this.debug = debug;
+ }
+
+ public URL getSplitterBinary() {
+ return splitterBinary;
+ }
+
+ @RunTime
+ @CreoleParameter()
+ public void setSplitterBinary(URL splitterBinary) {
+ this.splitterBinary = splitterBinary;
+ }
+
+ public String getAnnotationSetName() {
+ return annotationSetName;
+ }
+
+ @RunTime
+ @Optional
+ @CreoleParameter()
+ public void setAnnotationSetName(String annotationSetName) {
+ this.annotationSetName = annotationSetName;
+ }
+
+ public void execute() throws ExecutionException {
+ AnnotationSet annotationSet = document.getAnnotations(annotationSetName);
+
+ File splitter = Files.fileFromURL(splitterBinary);
+
+ String docContent =
+ document.getContent().toString().replace((char)160, ' ');
+
+ try {
+ File tmpIn = File.createTempFile("GENIA", ".txt");
+ File tmpOut = File.createTempFile("GENIA", ".txt");
+
+ FileOutputStream fos = new FileOutputStream(tmpIn);
+ fos.write(docContent.getBytes("utf8"));
+ fos.close();
+
+ String[] args =
+ new String[]{splitter.getAbsolutePath(), tmpIn.getAbsolutePath(),
+ tmpOut.getAbsolutePath()};
+
+ manager.runProcess(args, splitter.getParentFile(), (debug ? System.out : null), (debug ? System.err : null));
+
+ int end = 0;
+
+ BufferedReader in = new BufferedReader(new FileReader(tmpOut));
+ String sentence = in.readLine();
+ while(sentence != null) {
+
+ sentence = sentence.trim();
+
+ int start = docContent.indexOf(sentence, end);
+
+ end = start + sentence.length();
+
+ if(end > start && sentence.length() > 0) {
+ annotationSet.add((long)start, (long)end, "Sentence",
+ Factory.newFeatureMap());
+ }
+
+ sentence = in.readLine();
+ }
+
+ if (!debug && !tmpIn.delete()) tmpIn.deleteOnExit();
+ if (!debug && !tmpOut.delete()) tmpOut.deleteOnExit();
+
+ } catch(Exception ioe) {
+ throw new ExecutionException("An error occured running the splitter", ioe);
+ }
+ }
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|