Revision: 8980
http://docbook.svn.sourceforge.net/docbook/?rev=8980&view=rev
Author: kasunbg
Date: 2011-03-26 09:52:27 +0000 (Sat, 26 Mar 2011)
Log Message:
-----------
Removed the WebHelpIndexer's ANT dependency. It's possible to use it as a standalone version now.
ex:
java -cp webhelpindexer.jar:lib/lucene-core-3.0.0.jar:lib/lucene-analyzers-3.0.0.jar:/usr/share/java/xercesImpl.jar com.nexwave.nquindexer.IndexerMain ../x$
discussion:
http://lists.oasis-open.org/archives/docbook-apps/201102/msg00079.html
Modified Paths:
--------------
trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java
trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java
trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java
Added Paths:
-----------
trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java
Added: trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java
===================================================================
--- trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java (rev 0)
+++ trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java 2011-03-26 09:52:27 UTC (rev 8980)
@@ -0,0 +1,404 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.nexwave.nquindexer;
+
+import com.nexwave.nsidita.DirList;
+import com.nexwave.nsidita.DocFileInfo;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com
+ * Date: Feb 10, 2011
+ */
+
+public class IndexerMain {
+
+ // messages
+ private String txt_no_inputdir = "Input directory not found:";
+ private String txt_cannot_create_outputdir = "Cannot create output search directory.";
+ private String txt_no_files_found = "No html files found.";
+ private String txt_wrong_dita_basedir = "ERROR: Parser initialization failed. Wrong dita base dir";
+ private String txt_no_relative_files_found = "No relative html files calculated.";
+ private String txt_no_words_gathered = "No words have been indexed in";
+ private String txt_no_html_files = "No HTML Files found in";
+ private String txt_no_args = "No argument given: you must provide an htmlDir to the IndexerMain";
+
+ private static String txt_no_lang_specified ="Language of the content is not specified. Defaults to English.";
+
+ //working directories
+ private String searchdir = "search";
+ private File inputDir = null;
+ private String outputDir = null;
+ private String projectDir = null;
+
+ // ANT parameters
+ public String htmlDir = null;
+ public String indexerLanguage = "en";
+
+ //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,
+ // as stemmers doesn't find a difference between them.
+ private String[] supportedLanguages = {"en", "de", "fr", "zh", "ja", "ko"}; //currently extended support available for
+ // English, German, French and CJK (Chinese [zh], Japanese [ja], Korean [ko]) languages only.
+
+ // Indexing features: words to remove
+ private ArrayList<String> cleanUpStrings = null;
+ private ArrayList<String> cleanUpChars = null;
+
+ //Html extension
+ private String htmlExtension = "html";
+
+ // Constructors
+ public IndexerMain(String htmlDir, String indexerLanguage) {
+ super();
+ setHtmlDir(htmlDir);
+ setIndexerLanguage(indexerLanguage);
+ }
+
+ /**
+ * The content language defaults to English "en"
+ * @param htmlDir The directory where html files resides.
+ */
+ public IndexerMain(String htmlDir) {
+ super();
+ setHtmlDir(htmlDir);
+ setIndexerLanguage("en");
+ }
+
+ /**
+ * The setter for the "htmlDir" attribute (parameter of the task)
+ *
+ * @param htmlDir
+ */
+ public void setHtmlDir(String htmlDir) {
+ this.htmlDir = htmlDir;
+ }
+
+ /**
+ * Set the extension in which html files are generated
+ *
+ * @param htmlExtension The extension in which html files are generated
+ */
+ public void setHtmlextension(String htmlExtension) {
+ this.htmlExtension = htmlExtension;
+ //Trim the starting "."
+ if (this.htmlExtension.startsWith(".")) {
+ this.htmlExtension = this.htmlExtension.substring(1);
+ }
+ }
+
+ /**
+ * setter for "indexerLanguage" attribute from ANT
+ *
+ * @param indexerLanguage language for the search indexer. Used to differentiate which stemmer to be used.
+ */
+ public void setIndexerLanguage(String indexerLanguage) {
+ if (indexerLanguage != null && !"".equals(indexerLanguage)) {
+ int temp = indexerLanguage.indexOf('_');
+ if (temp != -1) {
+ indexerLanguage = indexerLanguage.substring(0, temp);
+ }
+ int i = 0;
+ for (; i < supportedLanguages.length; i++) {
+ if (indexerLanguage.equals(supportedLanguages[i])) {
+ this.indexerLanguage = supportedLanguages[i];
+ break;
+ }
+ }
+
+ //if not in supported language list,
+ if (i >= supportedLanguages.length) {
+// System.out.println("The given language, \""+indexerLanguage+"\", does not have extensive support for " +
+// "searching. Check documentation for details. ");
+ this.indexerLanguage = indexerLanguage;
+ }
+ } else {
+ this.indexerLanguage = "@@"; //fail-safe mechanism, This vm should not reach this point.
+ }
+ }
+
+ /**
+ * com.nexwave.nquindexer.IndexerMain
+ * The main class without Ant dependencies.
+ * This can be used as a standalone jar.
+ *
+ * @param args need two parameters for this array. htmlDirectory indexerLanguage
+ * If only one parameter is there (htmlDir), indexerLanguage defaults to english
+ */
+ public static void main(String[] args) {
+
+ IndexerMain indexer;
+ if (args.length == 1) {
+ System.out.println(txt_no_lang_specified);
+ indexer = new IndexerMain(args[0]);
+ } else if (args.length >= 2) {
+
+ indexer = new IndexerMain(args[0], args[1]);
+ } else {
+ throw new ArrayIndexOutOfBoundsException("Please specify the parameters htmlDirectory and (optional) " +
+ "indexerLanguage");
+ }
+
+ indexer.execute();
+
+ }
+
+
+ /**
+ * Implementation of the execute function (Task interface)
+ */
+ public void execute() {
+ try {
+ //Use Xerces as the parser. Does not support Saxon6.5.5 parser
+ System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
+ System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl");
+// System.setProperty("org.xml.sax.driver", "com.icl.saxon.aelfred.SAXDriver");
+// System.setProperty("javax.xml.parsers.SAXParserFactory", "com.icl.saxon.aelfred.SAXParserFactoryImpl");
+ } catch (SecurityException se) {
+ System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
+ "is not in your CLASSPATH.");
+ } catch (Exception e) {
+ System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
+ "is not in your CLASSPATH");
+ }
+
+ ArrayList<DocFileInfo> filesDescription = null; // list of information about the topic files
+ ArrayList<File> htmlFiles = null; // topic files listed in the given directory
+ ArrayList<String> htmlFilesPathRel = null;
+ Map<String, String> tempDico = new HashMap<String, String>();
+ Iterator it;
+
+ //File name initialization
+ String htmlList = "htmlFileList.js";
+ String htmlInfoList = "htmlFileInfoList.js";
+ String indexName = ".js";
+
+ //timing
+ Date dateStart = new Date();
+
+ if (htmlDir == null) {
+ System.out.println(txt_no_args + ".");
+ return;
+ }
+ // Init input directory
+ inputDir = new File(htmlDir);
+
+ // Begin of init
+ // check if inputdir initialized
+ if (inputDir == null) {
+ DisplayHelp();
+ return;
+ }
+
+ // check if inputdir exists
+ if (!inputDir.exists()) {
+ System.out.println(txt_no_inputdir + " " + inputDir + ".");
+ return;
+ }
+
+ // check if outputdir defined
+ if (outputDir == null) {
+ //set the output directory: path= {inputDir}/search
+ outputDir = inputDir.getPath().concat(File.separator).concat(searchdir);
+ }
+
+ // check if outputdir exists
+ File tempfile = new File(outputDir);
+ if (!tempfile.exists()) {
+ boolean b = (new File(outputDir)).mkdir();
+ if (!b) {
+ System.out.println(txt_cannot_create_outputdir + " " + outputDir + ".");
+ return;
+ }
+ }
+
+ // check if projdir is defined
+ if (projectDir == null) {
+ projectDir = inputDir.getPath();
+ }
+ //end of init
+
+
+ // Get the list of all html files but the tocs, covers and indexes
+ DirList nsiDoc = new DirList(inputDir, "^.*\\." + htmlExtension + "?$", 1);
+ htmlFiles = nsiDoc.getListFiles();
+ // Check if found html files
+ if (htmlFiles.isEmpty()) {
+ System.out.println(txt_no_html_files + " " + inputDir + ".");
+ return;
+ }
+ // Get the list of all html files with relative paths
+ htmlFilesPathRel = nsiDoc.getListFilesRelTo(projectDir);
+
+ if (htmlFiles == null) {
+ System.out.println(txt_no_files_found);
+ return;
+ } else if (htmlFilesPathRel == null) {
+ System.out.println(txt_no_relative_files_found);
+ return;
+ }
+
+ // Create the list of the existing html files (index starts at 0)
+ WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel);
+
+ // Parse each html file to retrieve the words:
+ // ------------------------------------------
+
+ // Retrieve the clean-up properties for indexing
+ RetrieveCleanUpProps();
+ // System.out.print("clean"+" " +cleanUpStrings);
+
+ //create a default handler
+ //SaxHTMLIndex spe = new SaxHTMLIndex (); // do not use clean-up props files
+ //SaxHTMLIndex spe = new SaxHTMLIndex (cleanUpStrings); // use clean-up props files
+ SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files
+
+ if (spe.init(tempDico) == 0) {
+
+ //create a html file description list
+ filesDescription = new ArrayList<DocFileInfo>();
+
+ it = htmlFiles.iterator();
+
+ // parse each html files
+ while (it.hasNext()) {
+ File ftemp = (File) it.next();
+ //tempMap.put(key, value);
+ //The HTML file information are added in the list of FileInfoObject
+ DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, this.indexerLanguage));
+
+ ftemp = docFileInfoTemp.getFullpath();
+ String stemp = ftemp.toString();
+ int i = stemp.indexOf(projectDir);
+ if (i != 0) {
+ System.out.println("the documentation root does not match with the documentation input!");
+ return;
+ }
+ int ad = 1;
+ if (stemp.equals(projectDir)) ad = 0;
+ stemp = stemp.substring(i + projectDir.length() + ad); //i is redundant (i==0 always)
+ ftemp = new File(stemp);
+ docFileInfoTemp.setFullpath(ftemp);
+
+ filesDescription.add(docFileInfoTemp);
+ }
+ /*remove empty strings from the map*/
+ if (tempDico.containsKey("")) {
+ tempDico.remove("");
+ }
+ // write the index files
+ if (tempDico.isEmpty()) {
+ System.out.println(txt_no_words_gathered + " " + inputDir + ".");
+ return;
+ }
+
+// WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico);
+ WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico, indexerLanguage);
+
+ // write the html list file with title and shortdesc
+ //create the list of the existing html files (index starts at 0)
+ WriteJSFiles.WriteHTMLInfoList(outputDir.concat(File.separator).concat(htmlInfoList), filesDescription);
+
+ //perf measurement
+ Date dateEnd = new Date();
+ long diff = dateEnd.getTime() - dateStart.getTime();
+ if (diff < 1000)
+ System.out.println("Delay = " + diff + " milliseconds");
+ else
+ System.out.println("Delay = " + diff / 1000 + " seconds");
+ } else {
+ System.out.println(txt_wrong_dita_basedir);
+ return;
+ }
+ }
+
+ /**
+ * Prints the usage information for this class to <code>System.out</code>.
+ */
+ private static void DisplayHelp() {
+ String lSep = System.getProperty("line.separator");
+ StringBuffer msg = new StringBuffer();
+ msg.append("USAGE:" + lSep);
+ msg.append(" java -classpath TesterIndexer inputDir outputDir projectDir" + lSep);
+ msg.append("with:" + lSep);
+ msg.append(" inputDir (mandatory) : specify the html files ' directory to index" + lSep);
+ msg.append(" outputDir (optional) : specify where to output the index files" + lSep);
+ msg.append(" projectDir (optional) : specify the root of the documentation directory" + lSep);
+ msg.append("Example:" + lSep);
+ msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc" + lSep);
+ msg.append("Example 2:" + lSep);
+ msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc/customer/concepts /home/$USER/temp/search /home/$USER/DITA/doc/" + lSep);
+ System.out.println(msg.toString());
+ }
+
+ private int RetrieveCleanUpProps() {
+
+ // Files for punctuation (only one for now)
+ String[] punctuationFiles = new String[]{"punctuation.props"};
+ FileInputStream input;
+ String tempStr;
+ File ftemp;
+ Collection c = new ArrayList<String>();
+
+ // Get the list of the props file containing the words to remove (not the punctuation)
+ DirList props = new DirList(inputDir, "^(?!(punctuation)).*\\.props$", 1);
+ ArrayList<File> wordsList = props.getListFiles();
+// System.out.println("props files:"+wordsList);
+ //TODO all properties are taken to a single arraylist. does it ok?.
+ Properties enProps = new Properties();
+ String propsDir = inputDir.getPath().concat(File.separator).concat(searchdir);
+
+ // Init the lists which will contain the words and chars to remove
+ cleanUpStrings = new ArrayList<String>();
+ cleanUpChars = new ArrayList<String>();
+
+ try {
+ // Retrieve words to remove
+ for (File aWordsList : wordsList) {
+ ftemp = aWordsList;
+ if (ftemp.exists()) {
+ enProps.load(input = new FileInputStream(ftemp.getAbsolutePath()));
+ input.close();
+ c = enProps.values();
+ cleanUpStrings.addAll(c);
+ enProps.clear();
+ }
+ }
+
+ // Retrieve char to remove (punctuation for ex.)
+ for (String punctuationFile : punctuationFiles) {
+ tempStr = propsDir.concat(File.separator).concat(punctuationFile);
+ ftemp = new File(tempStr);
+ if (ftemp.exists()) {
+ enProps.load(input = new FileInputStream(tempStr));
+ input.close();
+ c = enProps.values();
+ cleanUpChars.addAll(c);
+ enProps.clear();
+ }
+ }
+ }
+ catch (IOException e) {
+ e.printStackTrace();
+ return 1;
+ }
+ return 0;
+ }
+
+}
Modified: trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java
===================================================================
--- trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java 2011-03-21 17:55:22 UTC (rev 8979)
+++ trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java 2011-03-26 09:52:27 UTC (rev 8980)
@@ -1,3 +1,4 @@
+/*
package com.nexwave.nquindexer;
import java.io.File;
@@ -18,6 +19,7 @@
import com.nexwave.nsidita.DirList;
import com.nexwave.nsidita.DocFileInfo;
+*/
/**
* Indexer ant task.
*
@@ -25,7 +27,8 @@
*
* @author N. Quaine
* @author Kasun Gajasinghe <http://kasunbg.blogspot.com>
- */
+ *//*
+
public class IndexerTask extends Task {
// messages
@@ -36,7 +39,7 @@
private String txt_no_relative_files_found= "No relative html files calculated.";
private String txt_no_words_gathered= "No words have been indexed in";
private String txt_no_html_files="No HTML Files found in";
- private String txt_no_args="No argument given: you must provide an htmldir to the IndexerTask";
+ private String txt_no_args="No argument given: you must provide an htmlDir to the IndexerTask";
//working directories
private String searchdir = "search";
@@ -45,7 +48,7 @@
private String projectDir = null;
// ANT parameters
- private String htmldir=null;
+ private String htmlDir=null;
public static String indexerLanguage="en";
//supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,
@@ -64,18 +67,22 @@
public IndexerTask() {
super();
}
- /** The setter for the "htmldir" attribute (parameter of the task)
+ */
+/** The setter for the "htmlDir" attribute (parameter of the task)
* @param htmldir
* @throws InterruptedException
- */
- public void setHtmldir(String htmldir) {
- this.htmldir = htmldir;
+ *//*
+
+ public void setHtmlDir(String htmlDir) {
+ this.htmlDir = htmlDir;
}
- /**
+ */
+/**
* Set the extension in which html files are generated
* @param htmlExtension The extension in wich html files are generated
- */
+ *//*
+
public void setHtmlextension(String htmlExtension) {
this.htmlExtension = htmlExtension;
//Trim the starting "."
@@ -84,11 +91,13 @@
}
}
- /**
+ */
+/**
* setter for "indexerLanguage" attribute from ANT
* @param indexerLanguage language for the search indexer. Used to differerentiate which stemmer to be used.
* @throws InterruptedException for ant
- */
+ *//*
+
public void setIndexerLanguage(String indexerLanguage){
if(indexerLanguage !=null && !"".equals(indexerLanguage)) {
int temp = indexerLanguage.indexOf('_');
@@ -114,9 +123,11 @@
}
}
- /**
+ */
+/**
* Implementation of the execute function (Task interface)
- */
+ *//*
+
public void execute() throws BuildException {
try{
//Use Xerces as the parser. Does not support Saxon6.5.5 parser
@@ -146,12 +157,12 @@
//timing
Date dateStart = new Date();
- if (htmldir == null) {
+ if (htmlDir == null) {
System.out.println(txt_no_args + ".");
return;
}
// Init input directory
- inputDir = new File(htmldir);
+ inputDir = new File(htmlDir);
// Begin of init
// check if inputdir initialized
@@ -252,7 +263,9 @@
filesDescription.add(docFileInfoTemp);
}
- /*remove empty strings from the map*/
+ */
+/*remove empty strings from the map*//*
+
if (tempDico.containsKey("")) {
tempDico.remove("");
}
@@ -281,9 +294,11 @@
}
}
- /**
+ */
+/**
* Prints the usage information for this class to <code>System.out</code>.
- */
+ *//*
+
private static void DisplayHelp() {
String lSep = System.getProperty("line.separator");
StringBuffer msg = new StringBuffer();
@@ -354,3 +369,4 @@
}
}
+*/
Modified: trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java
===================================================================
--- trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java 2011-03-21 17:55:22 UTC (rev 8979)
+++ trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java 2011-03-26 09:52:27 UTC (rev 8980)
@@ -1,5 +1,7 @@
+/*
package com.nexwave.nquindexer;
+*/
/**
* For running tests with the indexertask.
*
@@ -7,17 +9,20 @@
*
* @author N. Quaine
* @author Kasun Gajasinghe
- */
+ *//*
+
public class TesterIndexer {
public static IndexerTask IT = null;
- /**
+ */
+/**
* @param args
* @throws InterruptedException
- */
+ *//*
+
public static void main(String[] args) throws InterruptedException {
if (args.length != 0) {
IT = new IndexerTask();
- IT.setHtmldir(args[0]);
+ IT.setHtmlDir(args[0]);
IT.setIndexerLanguage(args[1]);
IT.execute();
} else {
@@ -27,7 +32,7 @@
String dir = "../doc/content";
String lang = "en";
IT = new IndexerTask();
- IT.setHtmldir(dir);
+ IT.setHtmlDir(dir);
IT.setIndexerLanguage(lang);
IT.execute();
}
@@ -36,3 +41,4 @@
}
+*/
Modified: trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java
===================================================================
--- trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java 2011-03-21 17:55:22 UTC (rev 8979)
+++ trunk/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java 2011-03-26 09:52:27 UTC (rev 8980)
@@ -13,163 +13,247 @@
import java.util.TreeSet;
import com.nexwave.nsidita.DocFileInfo;
+
/**
* Outputs the js files with:
* - the list of html files and their description
* - the words retrieved from the html files and their location
- *
- * @version 2.0 2010-08-13
- *
+ *
* @author N. Quaine
* @author Kasun Gajasinghe
+ * @version 2.0 2010-08-13
*/
public class WriteJSFiles {
-
- private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";
- private static String txt_indices_location = "The created index files are located in ";
-
- /** Create a javascript array listing the html files with their paths relative to the project root
- * @param fileO path and name of the file in which to output the list of html files
- * @param list of the html files, relative to the doc root directory
- */
- public static void WriteHTMLList (String fileO,ArrayList<String> list) {
- int i = 0;
- Iterator it;
-
- if (list == null) {
- return;
- }
- if (fileO == null) {
- return;
- }
- it = list.iterator ( ) ;
-
- try {
- // open a outputstream, here a file
- OutputStream fOut= new FileOutputStream(fileO);
- OutputStream bout= new BufferedOutputStream(fOut);
- OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
-
- /*fl : file list*/
- out.write("//List of files which are indexed.\n");
- out.write("fl = new Array();\n");
- String temp;
- while ( it.hasNext ( ) ) {
- temp = (String)it.next();
- //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));
- out.write("fl[\""+i+"\"]"+"= \""+temp.replace(File.separatorChar, '/')+"\";\n");
- i++;
- }
-
- out.flush(); // Don't forget to flush!
- out.close();
+
+ private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";
+ private static String txt_indices_location = "The created index files are located in ";
+
+ /**
+ * Create a javascript array listing the html files with their paths relative to the project root
+ *
+ * @param fileO path and name of the file in which to output the list of html files
+ * @param list of the html files, relative to the doc root directory
+ */
+ public static void WriteHTMLList(String fileO, ArrayList<String> list) {
+ int i = 0;
+ Iterator it;
+
+ if (list == null) {
+ return;
+ }
+ if (fileO == null) {
+ return;
+ }
+ it = list.iterator();
+
+ try {
+ // open a outputstream, here a file
+ OutputStream fOut = new FileOutputStream(fileO);
+ OutputStream bout = new BufferedOutputStream(fOut);
+ OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
+
+ /*fl : file list*/
+ out.write("//List of files which are indexed.\n");
+ out.write("fl = new Array();\n");
+ String temp;
+ while (it.hasNext()) {
+ temp = (String) it.next();
+ //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));
+ out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n");
+ i++;
+ }
+
+ out.flush(); // Don't forget to flush!
+ out.close();
// System.out.println("the array of html is in " + fileO);
- }
- catch (UnsupportedEncodingException e) {
- System.out.println(txt_VM_encoding_not_supported);
- }
- catch (IOException e) {
- System.out.println(e.getMessage());
- }
-
- }
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
- /** Create a javascript array listing the html files with
- * their paths relative to project root, their titles and shortdescs
- * @param fileO path and name of the file in which to output the list of html files
- * @param list of the html files, relative to the doc root directory
- */
- public static void WriteHTMLInfoList (String fileO,ArrayList<DocFileInfo> list) {
- int i = 0;
- Iterator it = null;
-
- if (list == null) {
- return;
- }
- if (fileO == null) {
- return;
- }
- it = list.iterator ( ) ;
- try {
- // open a outputstream, here a file
- OutputStream fOut= new FileOutputStream(fileO);
- // open a buffer output stream
- OutputStream bout= new BufferedOutputStream(fOut);
- OutputStreamWriter out
- = new OutputStreamWriter(bout, "UTF-8");
-
- /*fil : file list*/
- out.write("fil = new Array();\n");
-
- DocFileInfo tempInfo;
- String tempPath;
- String tempTitle;
- String tempShortdesc;
- while ( it.hasNext ( ) ) {
- // Retrieve file information: path, title and shortdesc.
- tempInfo = (DocFileInfo)it.next();
- tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');
- tempTitle = tempInfo.getTitle();
- tempShortdesc = tempInfo.getShortdesc();
- //Remove unwanted white char
- if (tempTitle != null ) {
- tempTitle = tempTitle.replaceAll("\\s+", " ");
- tempTitle = tempTitle.replaceAll("['�\"]", " ");
- }
- if (tempShortdesc != null ) {
- tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");
- tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");
- }
- //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);
- out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@"+tempShortdesc+"\";\n");
- i++;
- }
-
- out.flush(); // Don't forget to flush!
- out.close();
+ }
- }
- catch (UnsupportedEncodingException e) {
- System.out.println(txt_VM_encoding_not_supported);
- }
- catch (IOException e) {
- System.out.println(e.getMessage());
- }
-
- }
+ /**
+ * Create a javascript array listing the html files with
+ * their paths relative to project root, their titles and shortdescs
+ *
+ * @param fileO path and name of the file in which to output the list of html files
+ * @param list of the html files, relative to the doc root directory
+ */
+ public static void WriteHTMLInfoList(String fileO, ArrayList<DocFileInfo> list) {
+ int i = 0;
+ Iterator it = null;
- /** Create javascript index files alphabetically.
- * @param fileOutStr contains the path and the suffix of the index files to create.
- * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
- * @param indexMap its keys are the indexed words and
- * its values are the list of the files which contain the word.
- */
- public static void WriteIndex (String fileOutStr, Map<String, ?> indexMap) {
- OutputStreamWriter out;
- OutputStream bout;
- OutputStream fOut;
- String tstr;
-
- // check arguments
- if (indexMap == null || fileOutStr ==null) {
- return;
- }
+ if (list == null) {
+ return;
+ }
+ if (fileO == null) {
+ return;
+ }
+ it = list.iterator();
+ try {
+ // open a outputstream, here a file
+ OutputStream fOut = new FileOutputStream(fileO);
+ // open a buffer output stream
+ OutputStream bout = new BufferedOutputStream(fOut);
+ OutputStreamWriter out
+ = new OutputStreamWriter(bout, "UTF-8");
- // Collect the key of the index map
- TreeSet<String> sortedKeys = new TreeSet<String>();
- sortedKeys.addAll(indexMap.keySet());
- Iterator keyIt = sortedKeys.iterator();
- tstr = (String)keyIt.next();
-
- File fileOut= new File(fileOutStr);
+ /*fil : file list*/
+ out.write("fil = new Array();\n");
+ DocFileInfo tempInfo;
+ String tempPath;
+ String tempTitle;
+ String tempShortdesc;
+ while (it.hasNext()) {
+ // Retrieve file information: path, title and shortdesc.
+ tempInfo = (DocFileInfo) it.next();
+ tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');
+ tempTitle = tempInfo.getTitle();
+ tempShortdesc = tempInfo.getShortdesc();
+ //Remove unwanted white char
+ if (tempTitle != null) {
+ tempTitle = tempTitle.replaceAll("\\s+", " ");
+ tempTitle = tempTitle.replaceAll("['�\"]", " ");
+ }
+ if (tempShortdesc != null) {
+ tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");
+ tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");
+ }
+ //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);
+ out.write("fil[\"" + i + "\"]" + "= \"" + tempPath + "@@@" + tempTitle + "@@@" + tempShortdesc + "\";\n");
+ i++;
+ }
+
+ out.flush(); // Don't forget to flush!
+ out.close();
+
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
+
+ }
+
+ /**
+ * Create javascript index files alphabetically.
+ *
+ * @param fileOutStr contains the path and the suffix of the index files to create.
+ * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
+ * @param indexMap its keys are the indexed words and
+ * its values are the list of the files which contain the word.
+ * @param indexerLanguage The language of the content that gets indexed
+ */
+ public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {
+ OutputStreamWriter out;
+ OutputStream bout;
+ OutputStream fOut;
+ String tstr;
+
+ // check arguments
+ if (indexMap == null || fileOutStr == null) {
+ return;
+ }
+
+ // Collect the key of the index map
+ TreeSet<String> sortedKeys = new TreeSet<String>();
+ sortedKeys.addAll(indexMap.keySet());
+ Iterator keyIt = sortedKeys.iterator();
+ tstr = (String) keyIt.next();
+
+ File fileOut = new File(fileOutStr);
+
/* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js
+ * Index will be distributed evenly in these three files.
+ * tstr is the current key
+ * keyIt is the iterator of the key set
+ * */
+ int indexSize = sortedKeys.size();
+ for (int i = 1; i <= 3; i++) {
+ try {
+ // open a outputstream, here a file
+ fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());
+ bout = new BufferedOutputStream(fOut);
+ out = new OutputStreamWriter(bout, "UTF-8");
+
+ try {
+ /* Populate a javascript hashmap:
+ The key is a word to look for in the index,
+ The value is the numbers of the files in which the word exists.
+ Example: w["key"]="file1,file2,file3";*/
+ int count = 0;
+ if (i == 1)
+ out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");
+ out.write("//Auto generated index for searching.\n");
+ while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0))
+ out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
+ tstr = (String) keyIt.next();
+ count++;
+ if (indexSize / count < 3) {
+ break;
+ }
+ }
+ out.write("\n");
+ out.flush(); // Don't forget to flush!
+ out.close();
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
+ }
+ System.out.println(txt_indices_location + fileOutStr);
+ }
+
+
+ /**
+ * Create javascript index files alphabetically.
+ *
+ * @deprecated replaced by WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {
+ *
+ * @param fileOutStr contains the path and the suffix of the index files to create.
+ * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
+ * @param indexMap its keys are the indexed words and
+ * its values are the list of the files which contain the word.
+ */
+
+
+ public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap) {
+ OutputStreamWriter out;
+ OutputStream bout;
+ OutputStream fOut;
+ String tstr;
+
+ // check arguments
+ if (indexMap == null || fileOutStr == null) {
+ return;
+ }
+
+ // Collect the key of the index map
+ TreeSet<String> sortedKeys = new TreeSet<String>();
+ sortedKeys.addAll(indexMap.keySet());
+ Iterator keyIt = sortedKeys.iterator();
+ tstr = (String) keyIt.next();
+
+ File fileOut = new File(fileOutStr);
+
+ /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js
* Index will be distributed evenly in these three files.
* tstr is the current key
* keyIt is the iterator of the key set
* */
- int indexSize = sortedKeys.size();
+ int indexSize = sortedKeys.size();
for (int i = 1; i <= 3; i++) {
try {
// open a outputstream, here a file
@@ -183,17 +267,17 @@
The value is the numbers of the files in which the word exists.
Example: w["key"]="file1,file2,file3";*/
int count = 0;
- if(i==1)
- out.write("var indexerLanguage=\""+IndexerTask.indexerLanguage+"\";\n");
+// if (i == 1)
+// out.write("var indexerLanguage=\"" + IndexerTask.indexerLanguage + "\";\n");
out.write("//Auto generated index for searching.\n");
while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0))
out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
tstr = (String) keyIt.next();
count++;
- if (indexSize / count < 3){
+ if (indexSize / count < 3) {
break;
}
- }
+ }
out.write("\n");
out.flush(); // Don't forget to flush!
out.close();
@@ -205,7 +289,7 @@
catch (IOException e) {
System.out.println(e.getMessage());
}
- }
- System.out.println(txt_indices_location + fileOutStr);
- }
+ }
+ System.out.println(txt_indices_location + fileOutStr);
+ }
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|