|
From: Dejan K. <dej...@nb...> - 2003-10-10 08:11:34
|
Applied. Thanks David!
Dejan
----- Original Message -----
From: "David Kinnvall" <dav...@al...>
To: "Babeldoc Developer List" <bab...@li...>
Cc: "Dejan Krsmanovic" <dej...@nb...>
Sent: Friday, October 10, 2003 9:39 AM
Subject: Re: [Babeldoc-devel] Scanners and filters...
> Dejan, Mike, list,
>
> Dejan Krsmanovic wrote:
>
> > Hi Mike,
> [snip]
> > Anyway, I have just copied methods from 1.0 branch to current (1.2). I
have
> > no time for testing now so please check if there are some problems with
it!
> > Try to add filtering functionality to MailbxScanner with this config.
>
> I have tested the methods in my setup and they seem to work just
> fine. Filtering works, both with and without a specified filter.
> What's nicer is that I was able to drop my own filtering stuff,
> since these methods cover the needs I have.
>
> I did some trivial cleanup:
>
> - Added missing messages to messages.properties (please review!)
> - Moved the patterns Hashtable to the top of the ScannerWorker
> file and added a brief javadoc about it
> - Re-ordered the methods and accompanying javadocs that got a
> bit mixed up (addFilter and acceptEntry) and added a bit more
> javadoc text explaining how the filter logic works
> - Really "new" in this patch is only my previously suggested
> addition of providing the DirectoryScanner's doneDirectory
> under the attribute "done_dir"
>
> Patch, as described above, attached.
>
> > Thanks,
> > Dejan
>
> /David
>
----------------------------------------------------------------------------
----
> Index: config/i18n/messages.properties
> ===================================================================
> RCS file:
/cvsroot/babeldoc/babeldoc/modules/scanner/config/i18n/messages.properties,v
> retrieving revision 1.16
> diff -u -r1.16 messages.properties
> --- config/i18n/messages.properties 1 Oct 2003 08:33:35 -0000 1.16
> +++ config/i18n/messages.properties 10 Oct 2003 07:27:59 -0000
> @@ -32,8 +32,11 @@
> scanner.ScannerThread.info.scanningPaused=Scanning paused
> scanner.ScannerThread.info.showConfig=ScannerThread initialized using
config {0}
>
> -
> +#ScannerWorker
> scanner.ScannerWorker.error.message=Error during scaning
> +scanner.ScannerWorker.debug.addingFilter=Adding filter '{1}': '{0}'
> +scanner.ScannerWorker.warn.noFilter=No filter called '{0}' found!
> +scanner.ScannerWorker.debug.match=Matching '{1}': {0}
>
> #DirectoryScanner
> scanner.DirectoryScanner.error.notDir=Configuration {0} has a value {1}
which is not an accessible directory!
> Index: src/com/babeldoc/scanner/ScannerWorker.java
> ===================================================================
> RCS file:
/cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/ScannerW
orker.java,v
> retrieving revision 1.28
> diff -u -r1.28 ScannerWorker.java
> --- src/com/babeldoc/scanner/ScannerWorker.java 9 Oct 2003 06:54:10 -0000
1.28
> +++ src/com/babeldoc/scanner/ScannerWorker.java 10 Oct 2003 07:27:59 -0000
> @@ -118,6 +118,9 @@
> /** must the documents be submitted as binaries */
> private boolean binary;
>
> + /** Used to filter what documents to accept for processing */
> + private Hashtable patterns = new Hashtable();
> +
> public static final String SCANNER_KEY = "scanner";
> public static final String SCAN_DATE_KEY = "scan_date";
> public static final String SCAN_PATH_KEY = "scan_path";
> @@ -217,15 +220,23 @@
> return this.valueObject;
> }
>
> +
> /**
> - * Does this worker accept this entry
> - *
> - * @param filter filter string
> - * @param string name to be filtered
> - *
> - * @return true if accepted - false otherwise
> + * Add named filter to use when deciding what documents
> + * to accept for processing. This method gets called by
> + * implementing subclasses to add filters specific to
> + * each scanner implementation.
> + *
> + * The filter is fetched from the scanner configuration
> + * and must be a valid Java regular expression according
> + * to the documentation for java.util.regex.Pattern
> + *
> + * An empty or non-existing pattern is interpreted and
> + * stored as ".*", i.e the match-all wildcard pattern.
> + *
> + * @param filterName Name of configured filter to add,
> + * replaces any existing pattern having the same name
> */
> - private Hashtable patterns = new Hashtable();
> protected void addFilter(String filterName) {
> String patternExp = (String)
this.getInfo().getOption(filterName).getValue();
> if ((patternExp==null) || patternExp.equals("")) {
> @@ -238,16 +249,29 @@
> "scanner.ScannerWorker.debug.addingFilter",
> patternExp,
> filterName));
> - }
> + }
> patterns.put(filterName, pattern);
> }
>
> +
> + /**
> + * Does this worker accept this entry when matched against
> + * the named pattern? If the pattern name does not exist,
> + * the entry is not accepted, else the entry is accepted
> + * if the entry matches the regular expression defined by
> + * the named pattern.
> + *
> + * @param patternName Name of pattern to use when matching
> + * @param text Text to match against the pattern
> + *
> + * @return true if accepted - false otherwise
> + */
> protected boolean acceptEntry(String patternName, String text) {
> Pattern pattern = (Pattern) patterns.get(patternName);
> if (pattern == null) {
> LogService.getInstance().logDebug(
> I18n.get("scanner.ScannerWorker.warn.noFilter", patternName));
> - return false;
> + return false;
> }
> Matcher matcher = pattern.matcher(text);
> boolean result = matcher.matches();
> @@ -302,7 +326,7 @@
> this.initialize();
>
> if (getLog().isDebugEnabled()) {
> - getLog().logDebug(this.getName() + " worker initalized
successfully");
> + getLog().logDebug(this.getName() + " worker initialized
successfully");
> }
>
> //Set status to stopped if worker should be ignored
> Index: src/com/babeldoc/scanner/worker/DirectoryScanner.java
> ===================================================================
> RCS file:
/cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/worker/D
irectoryScanner.java,v
> retrieving revision 1.24
> diff -u -r1.24 DirectoryScanner.java
> --- src/com/babeldoc/scanner/worker/DirectoryScanner.java 8 Oct 2003
13:39:07 -0000 1.24
> +++ src/com/babeldoc/scanner/worker/DirectoryScanner.java 10 Oct 2003
07:27:59 -0000
> @@ -100,6 +100,12 @@
> public static final String FILTER_FILENAME = "filter";
> public static final String MINIMUM_FILE_AGE = "minimumFileAge";
>
> + /**
> + * This is used to provide information to the pipeline stages
> + * about where documents processed by this scanner are moved.
> + */
> + public static final String DONE_DIR_KEY = "done_dir";
> +
> public DirectoryScanner() {
> super(new DirectoryScannerInfo());
> }
> @@ -305,7 +311,8 @@
> PipelineDocument.getMimeTypeForFile(file.getName())),
> new NameValuePair(SCAN_DATE_KEY, Long.toString(modified)),
> new NameValuePair(SCAN_PATH_KEY, file.getCanonicalPath()),
> - new NameValuePair(FILE_NAME_KEY, file.getName())});
> + new NameValuePair(FILE_NAME_KEY, file.getName()),
> + new NameValuePair(DONE_DIR_KEY,
getDoneDirectory())});
> } finally {
> fis.close();
> baos.close();
> @@ -315,7 +322,8 @@
> /**
> * Consult configuration if this file should be processed
> * or not. Current configurable constraints include the age
> - * of the file and a filename filter.
> + * of the file and a filename filter, both optional and by
> + * default all permissive.
> *
> * @param file The file to be checked against configuration
> * @return true If the file should be processed at this time
>
>
|