|
From: <mar...@us...> - 2014-04-20 09:46:57
|
Revision: 17886
http://sourceforge.net/p/gate/code/17886
Author: markagreenwood
Date: 2014-04-20 09:46:53 +0000 (Sun, 20 Apr 2014)
Log Message:
-----------
moved a test class into the test src folder where it belongs
Added Paths:
-----------
gate/trunk/src/test/gate/creole/ProfilePRs.java
Removed Paths:
-------------
gate/trunk/src/main/gate/creole/ProfilePRs.java
Deleted: gate/trunk/src/main/gate/creole/ProfilePRs.java
===================================================================
--- gate/trunk/src/main/gate/creole/ProfilePRs.java 2014-04-20 01:21:18 UTC (rev 17885)
+++ gate/trunk/src/main/gate/creole/ProfilePRs.java 2014-04-20 09:46:53 UTC (rev 17886)
@@ -1,214 +0,0 @@
-/*
- * ProfilePRs.java
- *
- * Copyright (c) 1995-2012, The University of Sheffield. See the file
- * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free
- * software, licenced under the GNU Library General Public License,
- * Version 2, June 1991 (in the distribution as file licence.html,
- * and also available at http://gate.ac.uk/gate/licence.html).
- *
- * Kalina Bontcheva, 04/10/2001
- *
- * $Id$
- */
-
-package gate.creole;
-
-import java.io.File;
-import java.util.*;
-
-import gate.*;
-import gate.creole.gazetteer.DefaultGazetteer;
-import gate.creole.orthomatcher.OrthoMatcher;
-import gate.creole.splitter.SentenceSplitter;
-import gate.creole.tokeniser.DefaultTokeniser;
-import gate.util.GateException;
-import gate.util.Out;
-import gate.util.profile.Profiler;
-//import java.text.NumberFormat;
-
-/**
- * This class provides a main function that:
- * <UL>
- * <LI>
- * initialises the GATE library, and creates all PRs
- * <LI>
- * takes a directory name as argument
- * <LI>
- * for each .html file in that directory:
- * <BR> create a GATE document from the file
- * <BR> run the PRs on the document
- * <BR> dump some statistics in the end
- * </UL>
- */
-public class ProfilePRs {
-
- /** String to print when wrong command-line args */
- private static String usage =
- "usage: ProfilePRs [-dir directory-name | file(s)]";
-
- private static double totalDocLength = 0;
- private static int docs = 0;
- private static Profiler prof = new Profiler();
- private static double maxDocLength = 0;
-
- /** Main function */
- public static void main(String[] args) throws Exception {
- // say "hi"
- Out.prln("processing command line arguments");
-
- // check we have a directory name or list of files
- List<File> inputFiles = null;
- if(args.length < 1) throw new GateException(usage);
- if(args[0].equals("-dir")) { // list all the files in the dir
- if(args.length < 2) throw new GateException(usage);
- File dir = new File(args[1]);
- File[] filesArray = dir.listFiles();
- if(filesArray == null)
- throw new GateException(
- dir.getPath() + " is not a directory; " + usage
- );
- inputFiles = Arrays.asList(filesArray);
- } else { // all args should be file names
- inputFiles = new ArrayList<File>();
- for(int i = 0; i < args.length; i++)
- inputFiles.add(new File(args[i]));
- }
-
- prof.initRun("Measuring performance on directory " + args[1]);
-// prof.enable(false);
-// prof.enableGCCalling(false);
-
- // initialise GATE
- prof.checkPoint("Before GATE.init()");
- Gate.init();
- //tell GATE we're in batch mode
-// gate.Main.batchMode = true;
-
-
- // create some processing resources
- prof.checkPoint("Before creating the processing resources");
-
- //create a default tokeniser
- FeatureMap params = Factory.newFeatureMap();
- DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource(
- "gate.creole.tokeniser.DefaultTokeniser", params);
- prof.checkPoint("Tokeniser initialised");
-
- //create a default gazetteer
- params = Factory.newFeatureMap();
- DefaultGazetteer gaz = (DefaultGazetteer) Factory.createResource(
- "gate.creole.gazetteer.DefaultGazetteer", params);
- prof.checkPoint("Gazetteer initialised");
-
- //create a splitter
- params = Factory.newFeatureMap();
- SentenceSplitter splitter = (SentenceSplitter) Factory.createResource(
- "gate.creole.splitter.SentenceSplitter", params);
- prof.checkPoint("Sentence splitter initialised");
-
- //create a tagger
- params = Factory.newFeatureMap();
- POSTagger tagger = (POSTagger) Factory.createResource(
- "gate.creole.POSTagger", params);
- prof.checkPoint("POSTagger initialised");
-
- //create a grammar
- params = Factory.newFeatureMap();
- ANNIETransducer transducer = (ANNIETransducer) Factory.createResource(
- "gate.creole.ANNIETransducer", params);
- prof.checkPoint("Grammars initialised");
-
- //create an orthomatcher
- params = Factory.newFeatureMap();
- OrthoMatcher orthomatcher = (OrthoMatcher) Factory.createResource(
- "gate.creole.orthomatcher.OrthoMatcher", params);
- prof.checkPoint("Orthomatcher initialised");
-
-
- // for each document
- // create a gate doc
- // set as the document for hte PRs
- // run the PRs
- // dump output from the doc
- // delete the doc
- Out.prln("\nLooping on input files list");
- Iterator<File> filesIter = inputFiles.iterator();
- docs = inputFiles.size();
- int fileNo=0;
- while(filesIter.hasNext()) {
- File inFile = filesIter.next(); // the current file
- fileNo++;
-
- // set the source URL parameter to a "file:..." URL string
- params.clear();
- params.put(Document.DOCUMENT_URL_PARAMETER_NAME, inFile.toURI().toURL().toExternalForm());
- params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
-
- // create the document
- Document doc = (Document) Factory.createResource(
- "gate.corpora.DocumentImpl", params
- );
- totalDocLength += doc.getContent().size().longValue();
-
- if (maxDocLength < doc.getContent().size().longValue())
- maxDocLength = doc.getContent().size().longValue();
-
- // set the document param on the PRs
- tokeniser.setDocument(doc);
- prof.checkPoint("Processing file " + inFile.getPath() +
- ", #" + fileNo + "/" + docs, new String[0], true, false, false);
- tokeniser.execute();
- prof.checkPoint("", new String[] {"Tokenizer", "Processing"}, false, false, false);
-
- //run gazetteer
- gaz.setDocument(doc);
- gaz.execute();
- prof.checkPoint("", new String[] {"Gazettier", "Processing"}, false, false, false);
-
- //run splitter
- splitter.setDocument(doc);
- splitter.execute();
- prof.checkPoint("", new String[] {"Splitter", "Processing"}, false, false, false);
-
- //run the tagger
- tagger.setDocument(doc);
- tagger.execute();
- prof.checkPoint("", new String[] {"Tagger", "Processing"}, false, false, false);
-
- //run the transducer
- transducer.setDocument(doc);
- transducer.execute();
- prof.checkPoint("", new String[] {"JAPE grammars", "Processing"}, false, false, false);
-
- // run the orthomatcher
- orthomatcher.setDocument(doc);
- orthomatcher.execute();
- prof.checkPoint("", new String[] {"Orthomatcher", "Processing"}, false, false, false);
-
- // make the doc a candidate for garbage collection
- Factory.deleteResource(doc);
-
- } // input files loop
-
- prof.checkPoint("Done!");
-
- totalDocLength = totalDocLength/1024;
- Out.prln("\nTotal KBytes processed: " + (long)totalDocLength);
- Out.prln("\nMax document size in bytes: " + (long)maxDocLength +
- " (" + (long) maxDocLength/1024 + " Kb)");
-
-
- prof.printCategAvg("Processing", docs, totalDocLength, "kb");
- prof.printCategAvg("Tokenizer", docs, totalDocLength, "kb");
- prof.printCategAvg("Gazettier", docs, totalDocLength, "kb");
- prof.printCategAvg("Splitter", docs, totalDocLength, "kb");
- prof.printCategAvg("Tagger", docs, totalDocLength, "kb");
- prof.printCategAvg("JAPE grammars", docs, totalDocLength, "kb");
- prof.printCategAvg("Orthomatcher", docs, totalDocLength, "kb");
- } // main
-
-
-} // class ProfilePRs
Copied: gate/trunk/src/test/gate/creole/ProfilePRs.java (from rev 17885, gate/trunk/src/main/gate/creole/ProfilePRs.java)
===================================================================
--- gate/trunk/src/test/gate/creole/ProfilePRs.java (rev 0)
+++ gate/trunk/src/test/gate/creole/ProfilePRs.java 2014-04-20 09:46:53 UTC (rev 17886)
@@ -0,0 +1,214 @@
+/*
+ * ProfilePRs.java
+ *
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Kalina Bontcheva, 04/10/2001
+ *
+ * $Id$
+ */
+
+package gate.creole;
+
+import java.io.File;
+import java.util.*;
+
+import gate.*;
+import gate.creole.gazetteer.DefaultGazetteer;
+import gate.creole.orthomatcher.OrthoMatcher;
+import gate.creole.splitter.SentenceSplitter;
+import gate.creole.tokeniser.DefaultTokeniser;
+import gate.util.GateException;
+import gate.util.Out;
+import gate.util.profile.Profiler;
+//import java.text.NumberFormat;
+
+/**
+ * This class provides a main function that:
+ * <UL>
+ * <LI>
+ * initialises the GATE library, and creates all PRs
+ * <LI>
+ * takes a directory name as argument
+ * <LI>
+ * for each .html file in that directory:
+ * <BR> create a GATE document from the file
+ * <BR> run the PRs on the document
+ * <BR> dump some statistics in the end
+ * </UL>
+ */
+public class ProfilePRs {
+
+ /** String to print when wrong command-line args */
+ private static String usage =
+ "usage: ProfilePRs [-dir directory-name | file(s)]";
+
+ private static double totalDocLength = 0;
+ private static int docs = 0;
+ private static Profiler prof = new Profiler();
+ private static double maxDocLength = 0;
+
+ /** Main function */
+ public static void main(String[] args) throws Exception {
+ // say "hi"
+ Out.prln("processing command line arguments");
+
+ // check we have a directory name or list of files
+ List<File> inputFiles = null;
+ if(args.length < 1) throw new GateException(usage);
+ if(args[0].equals("-dir")) { // list all the files in the dir
+ if(args.length < 2) throw new GateException(usage);
+ File dir = new File(args[1]);
+ File[] filesArray = dir.listFiles();
+ if(filesArray == null)
+ throw new GateException(
+ dir.getPath() + " is not a directory; " + usage
+ );
+ inputFiles = Arrays.asList(filesArray);
+ } else { // all args should be file names
+ inputFiles = new ArrayList<File>();
+ for(int i = 0; i < args.length; i++)
+ inputFiles.add(new File(args[i]));
+ }
+
+ prof.initRun("Measuring performance on directory " + args[1]);
+// prof.enable(false);
+// prof.enableGCCalling(false);
+
+ // initialise GATE
+ prof.checkPoint("Before GATE.init()");
+ Gate.init();
+ //tell GATE we're in batch mode
+// gate.Main.batchMode = true;
+
+
+ // create some processing resources
+ prof.checkPoint("Before creating the processing resources");
+
+ //create a default tokeniser
+ FeatureMap params = Factory.newFeatureMap();
+ DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource(
+ "gate.creole.tokeniser.DefaultTokeniser", params);
+ prof.checkPoint("Tokeniser initialised");
+
+ //create a default gazetteer
+ params = Factory.newFeatureMap();
+ DefaultGazetteer gaz = (DefaultGazetteer) Factory.createResource(
+ "gate.creole.gazetteer.DefaultGazetteer", params);
+ prof.checkPoint("Gazetteer initialised");
+
+ //create a splitter
+ params = Factory.newFeatureMap();
+ SentenceSplitter splitter = (SentenceSplitter) Factory.createResource(
+ "gate.creole.splitter.SentenceSplitter", params);
+ prof.checkPoint("Sentence splitter initialised");
+
+ //create a tagger
+ params = Factory.newFeatureMap();
+ POSTagger tagger = (POSTagger) Factory.createResource(
+ "gate.creole.POSTagger", params);
+ prof.checkPoint("POSTagger initialised");
+
+ //create a grammar
+ params = Factory.newFeatureMap();
+ ANNIETransducer transducer = (ANNIETransducer) Factory.createResource(
+ "gate.creole.ANNIETransducer", params);
+ prof.checkPoint("Grammars initialised");
+
+ //create an orthomatcher
+ params = Factory.newFeatureMap();
+ OrthoMatcher orthomatcher = (OrthoMatcher) Factory.createResource(
+ "gate.creole.orthomatcher.OrthoMatcher", params);
+ prof.checkPoint("Orthomatcher initialised");
+
+
+ // for each document
+ // create a gate doc
+ // set as the document for hte PRs
+ // run the PRs
+ // dump output from the doc
+ // delete the doc
+ Out.prln("\nLooping on input files list");
+ Iterator<File> filesIter = inputFiles.iterator();
+ docs = inputFiles.size();
+ int fileNo=0;
+ while(filesIter.hasNext()) {
+ File inFile = filesIter.next(); // the current file
+ fileNo++;
+
+ // set the source URL parameter to a "file:..." URL string
+ params.clear();
+ params.put(Document.DOCUMENT_URL_PARAMETER_NAME, inFile.toURI().toURL().toExternalForm());
+ params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
+
+ // create the document
+ Document doc = (Document) Factory.createResource(
+ "gate.corpora.DocumentImpl", params
+ );
+ totalDocLength += doc.getContent().size().longValue();
+
+ if (maxDocLength < doc.getContent().size().longValue())
+ maxDocLength = doc.getContent().size().longValue();
+
+ // set the document param on the PRs
+ tokeniser.setDocument(doc);
+ prof.checkPoint("Processing file " + inFile.getPath() +
+ ", #" + fileNo + "/" + docs, new String[0], true, false, false);
+ tokeniser.execute();
+ prof.checkPoint("", new String[] {"Tokenizer", "Processing"}, false, false, false);
+
+ //run gazetteer
+ gaz.setDocument(doc);
+ gaz.execute();
+ prof.checkPoint("", new String[] {"Gazettier", "Processing"}, false, false, false);
+
+ //run splitter
+ splitter.setDocument(doc);
+ splitter.execute();
+ prof.checkPoint("", new String[] {"Splitter", "Processing"}, false, false, false);
+
+ //run the tagger
+ tagger.setDocument(doc);
+ tagger.execute();
+ prof.checkPoint("", new String[] {"Tagger", "Processing"}, false, false, false);
+
+ //run the transducer
+ transducer.setDocument(doc);
+ transducer.execute();
+ prof.checkPoint("", new String[] {"JAPE grammars", "Processing"}, false, false, false);
+
+ // run the orthomatcher
+ orthomatcher.setDocument(doc);
+ orthomatcher.execute();
+ prof.checkPoint("", new String[] {"Orthomatcher", "Processing"}, false, false, false);
+
+ // make the doc a candidate for garbage collection
+ Factory.deleteResource(doc);
+
+ } // input files loop
+
+ prof.checkPoint("Done!");
+
+ totalDocLength = totalDocLength/1024;
+ Out.prln("\nTotal KBytes processed: " + (long)totalDocLength);
+ Out.prln("\nMax document size in bytes: " + (long)maxDocLength +
+ " (" + (long) maxDocLength/1024 + " Kb)");
+
+
+ prof.printCategAvg("Processing", docs, totalDocLength, "kb");
+ prof.printCategAvg("Tokenizer", docs, totalDocLength, "kb");
+ prof.printCategAvg("Gazettier", docs, totalDocLength, "kb");
+ prof.printCategAvg("Splitter", docs, totalDocLength, "kb");
+ prof.printCategAvg("Tagger", docs, totalDocLength, "kb");
+ prof.printCategAvg("JAPE grammars", docs, totalDocLength, "kb");
+ prof.printCategAvg("Orthomatcher", docs, totalDocLength, "kb");
+ } // main
+
+
+} // class ProfilePRs
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|