From: Santi B. <san...@us...> - 2007-12-13 00:02:01
|
Update of /cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/worker In directory sc8-pr-cvs3.sourceforge.net:/tmp/cvs-serv1821/modules/scanner/src/com/babeldoc/scanner/worker Added Files: VFSScanner.java Log Message: Added VFS scanner. Preliminar version. --- NEW FILE: VFSScanner.java --- /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2000 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact ap...@ap.... * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. * * Portions of this software are based upon public domain software * originally written at the National Center for Supercomputing Applications, * University of Illinois, Urbana-Champaign. * ==================================================================== * * Babeldoc: The Universal Document Processor * * $Header: /cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/worker/VFSScanner.java,v 1.1 2007/12/13 00:02:02 santibegue Exp $ * $DateTime$ * $Author: santibegue $ * */ package com.babeldoc.scanner.worker; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; import org.apache.commons.vfs.FileObject; import org.apache.commons.vfs.FileSelectInfo; import org.apache.commons.vfs.FileSystemException; import org.apache.commons.vfs.FileSystemManager; import org.apache.commons.vfs.FileSystemOptions; import org.apache.commons.vfs.FileType; import org.apache.commons.vfs.VFS; import com.babeldoc.core.I18n; import com.babeldoc.core.LogService; import com.babeldoc.core.NameValuePair; import com.babeldoc.core.option.ConfigOption; import com.babeldoc.core.option.IConfigOptionType; import com.babeldoc.core.pipeline.PipelineDocument; import com.babeldoc.scanner.ScannerConfigurationException; import com.babeldoc.scanner.ScannerException; import com.babeldoc.scanner.ScannerWorker; import com.babeldoc.scanner.ScannerWorkerInfo; /** * VFSScanner - scannerworker that scans a Virtual File System and sends to a * pipeline stage * * @author Begue, Santiago * @version $Revision: 1.1 $ */ public class VFSScanner extends ScannerWorker { /** Configuration options */ public static final String IN_DIR = "inDirectory"; public static final String DONE_DIR = "doneDirectory"; public static final String BUFFER_LEN = "bufferLen"; public static final String INCLUDE_SUB_DIRS = "includeSubfolders"; public static final String FILTER_FILENAME = "filter"; public static final String MINIMUM_FILE_AGE = "minimumFileAge"; private LogService log = LogService.getInstance(this.getClass().getName()); /** * This is used to provide information to the pipeline stages about where * documents processed by this scanner are moved. */ public static final String DONE_DIR_KEY = "done_dir"; public VFSScanner() { super(new VFSScannerInfo()); } private FileSystemManager fsManager; private FileSystemOptions opts; /** URI to scan */ private FileObject inDirectory; /** Directory to place output files */ private FileObject doneDirectory; /** flag to include sub directories */ private boolean includeSubDirs = false; /** * Minimum time in ms since file was last modified. Attempts to guard * against incomplete reads when the writer of the file is "slow". */ private int minimumFileAge = 0; /** * This method will scan for new documents. It will queue documents by * itself, so it will return null no matter how many documents found! * * @throws ScannerException * DOCUMENT ME! */ public void doScan() throws ScannerException { try { processVFS(this.getInDirectory()); } catch (FileSystemException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Setup the options. Each of the options lives in the hashtable held by the * superclass. * * @throws ScannerConfigurationException */ public void initialize() throws ScannerConfigurationException { try { fsManager = VFS.getManager(); opts = new FileSystemOptions(); // setInDirectory( // new File((String) this.getInfo().getOption(IN_DIR).getValue())); setInDirectory(this.getInfo().getStrValue(IN_DIR)); if (getInDirectory().getType() != FileType.FOLDER) { throw new ScannerConfigurationException(I18n.get( "scanner.VFSScanner.error.notDir", IN_DIR, getInDirectory()), null); } setDoneDirectory(this.getInfo().getStrValue(DONE_DIR)); if (getDoneDirectory() != null && !"".equals(getDoneDirectory())) { /* * // if the done directory does not end with '/' then add one. * if (!getDoneDirectory().endsWith(File.pathSeparator)) { * setDoneDirectory(getDoneDirectory() + File.separator); } */ // Check of the donedirectory actually is a directory if (doneDirectory.getType() != FileType.FOLDER) { throw new ScannerConfigurationException(I18n.get( "scanner.VFSScanner.error.notDir", DONE_DIR, getDoneDirectory()), null); } } else { getLog() .logInfo( "Done directory not specified. All processed file will be deleted!!!"); setDoneDirectory(null); } setIncludeSubDirs("true".equals(this.getInfo().getStrValue( INCLUDE_SUB_DIRS))); setMinimumFileAge(this.getInfo().getIntValue(MINIMUM_FILE_AGE)); if (getMinimumFileAge() > 0) { LogService.getInstance().logInfo( "Minimum file age: " + getMinimumFileAge() + " ms"); } // Add filename filter if exist addFilter(FILTER_FILENAME); } catch (FileSystemException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } /** * release the held resource. Do nothing - no held resources. */ public void relinquishResources() { } /** * This method will move current file to out directory. * * @param file * DOCUMENT ME! * * @throws Exception * DOCUMENT ME! * @throws ScannerException * DOCUMENT ME! */ protected void moveFile(FileObject file) throws Exception { try { if (getDoneDirectory() == null) { file.delete(); return; } String path = file.getName().getPath(); FileObject outDir = fsManager.resolveFile(getDoneDirectory()+ getInDirectory().getName().getPathDecoded().toString()); // create out dir if not exist if (!outDir.exists()) { outDir.createFolder(); } // create file with the same name (path) in out dir FileObject outfile = fsManager.resolveFile(outDir.getName().toString() + file.getName().getBaseName()); // delete file it is already exists if (outfile.exists()) { outfile.delete(); } // rename file if(!file.canRenameTo(outfile)) throw new ScannerException(I18n.get("scanner.VFSScanner.error.finishing")); //TODO Probably can do that more simply with a moveTo() // } catch (Exception e) { throw new ScannerException(I18n .get("scanner.VFSScanner.error.finishing"), e); } } /** * Process all files in this directory * * @param dir * DOCUMENT ME! * @throws FileSystemException */ private void processVFS(FileObject dir) throws FileSystemException { // inner class that implements FileFilter interface org.apache.commons.vfs.FileFilter filter = new org.apache.commons.vfs.FileFilter() { public boolean accept(FileSelectInfo current) { try { if (current.getFile().getType() == FileType.FOLDER) { // if iclude subDirs if it is scpecified return isIncludeSubDirs(); } else { // if file fulfils configured criteria return acceptFile(current); } } catch (FileSystemException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } } }; FileObject[] files = dir.getChildren(); for (int i = 0; i < files.length; i++) { if (files[i].getType() == FileType.FOLDER) { // add files from this folder processVFS(files[i]); } else { // create scan document and queue it! try { processFile(files[i]); moveFile(files[i]); } catch (Exception ex) { log.logError( "Error getting document from file " + files[i], ex); } } } } /** * Process a single file. This involves loading the file as data and adding * to the queue. * * @param file * * @throws Exception * DOCUMENT ME! */ private void processFile(FileObject file) throws Exception { InputStream fis = null; OutputStream baos = null; try { // getting message from file byte[] data = new byte[1024]; long modified = file.getContent().getLastModifiedTime(); fis = file.getContent().getInputStream(); baos = file.getContent().getOutputStream(); int charsRead = 0; while ((charsRead = fis.read(data)) != -1) { baos.write(data, 0, charsRead); } this.enqueue(baos.toByteArray(), new NameValuePair[] { new NameValuePair(SCAN_MIMETYPE_KEY, PipelineDocument .getMimeTypeForFile(file.getName().getBaseName())), new NameValuePair(SCAN_DATE_KEY, Long.toString(modified)), new NameValuePair(SCAN_PATH_KEY, file.getName().getPath()), new NameValuePair(FILE_NAME_KEY, file.getName().getBaseName()), new NameValuePair(DONE_DIR_KEY, getDoneDirectory()) }); } finally { fis.close(); baos.close(); } } /** * Consult configuration if this file should be processed or not. Current * configurable constraints include the age of the file and a filename * filter, both optional and by default all permissive. * * @param file * The file to be checked against configuration * @return true If the file should be processed at this time * @throws FileSystemException */ private boolean acceptFile(FileSelectInfo fileInfo) throws FileSystemException { // Check name filter first, and then age. if (acceptEntry(FILTER_FILENAME, fileInfo.getFile().getName().toString())) { if (getMinimumFileAge() <= fileInfo.getFile().getContent().getLastModifiedTime() - getFileAge(fileInfo.getFile())) { return true; } else { if (log.isDebugEnabled()) log.logDebug("Ignoring " + fileInfo.getFile().getName().getPathDecoded() + " (age " + getFileAge(fileInfo.getFile()) + " < " + getMinimumFileAge() + ")"); } // Potentially add additional checks here. } return false; } /** * Get age of file in ms. * * @param file * The file to get the age of. * @return long The age of the file in ms. * @throws FileSystemException */ private long getFileAge(FileObject file) throws FileSystemException { return System.currentTimeMillis() - file.getContent().getLastModifiedTime(); } public FileObject getInDirectory() { return inDirectory; } public void setInDirectory(String inDirectory) throws FileSystemException { this.inDirectory = fsManager.resolveFile(this.getInfo().getStrValue(IN_DIR)); } public String getDoneDirectory() throws FileSystemException { return doneDirectory.getURL().toString(); } public void setDoneDirectory(String doneDirectory) throws FileSystemException { this.doneDirectory = fsManager.resolveFile(this.getInfo().getStrValue( DONE_DIR)); } public boolean isIncludeSubDirs() { return includeSubDirs; } public void setIncludeSubDirs(boolean includeSubDirs) { this.includeSubDirs = includeSubDirs; } public int getMinimumFileAge() { return minimumFileAge; } public void setMinimumFileAge(int minimumFileAge) { if (minimumFileAge < 0) minimumFileAge = 0; this.minimumFileAge = minimumFileAge; } } /** * The configuration information object for the directory scanner * * @author $author$ * @version $Revision: 1.1 $ */ class VFSScannerInfo extends ScannerWorkerInfo { /** * @return The description */ public String getDescription() { return I18n.get("scanner.VFSScannerInfo.description"); } /** * @return The name */ public String getName() { return "VFSScanner"; } /** * @return The collection of type specific options */ public Collection getTypeSpecificOptions() { ArrayList options = new ArrayList(); // add specific options options.add(new ConfigOption(VFSScanner.IN_DIR, IConfigOptionType.DIRECTORY, null, true, I18n .get("scanner.VFSScannerInfo.option.inDirectory"))); options.add(new ConfigOption(VFSScanner.DONE_DIR, IConfigOptionType.DIRECTORY, null, false, I18n .get("scanner.VFSScannerInfo.option.doneDirectory"))); options.add(new ConfigOption(VFSScanner.INCLUDE_SUB_DIRS, IConfigOptionType.BOOLEAN, "false", false, I18n .get("scanner.VFSScannerInfo.option.includeSubdirs"))); options.add(new ConfigOption(VFSScanner.FILTER_FILENAME, IConfigOptionType.STRING, null, false, I18n .get("scanner.VFSScannerInfo.option.filter"))); options.add(new ConfigOption(VFSScanner.MINIMUM_FILE_AGE, IConfigOptionType.INTEGER, null, false, I18n .get("scanner.VFSScannerInfo.option.minimumFileAge"))); return options; } } |