|
From: Dejan K. <dej...@ya...> - 2003-10-08 13:19:13
|
Hi David,
I have just realized that filtering is not implemented
in new scanner version. It was implemented in 1.0
release but since I have rewrote whole scanner for 1.1
I obivioulsy forgot to implement it.
Have you checked if your code looks like code in 1.0?
I will review your code as soon as possible. And
don't worry, mine implementation has also required 1.4
since it used java.util.regex package. There are some
other pieces of Babeldoc that require it...
Dejan
P.S. Are you sure you don't want to become commiter?
--- David Kinnvall <dav...@al...> wrote:
> Hi people,
>
> I noticed that the filtering in DirectoryScanner,
> and the
> underlying ScannerWorker, was not actually
> implemented, and
> since I needed it I added some code that seems to
> work:
>
> ScannerWorker:
> - Added a private List of filters
> - Did the TODO in addFilter to feed the above List
> - Added a private method checkFilter(filter, string)
> - Added code to acceptEntry that checks the supplied
> string against the supplied filter and any already
> configured filters in the above List
>
> DirectoryScanner:
> - Added a private String for the (configured) filter
> - Added code to get the filter string if configured
> - Added code to acceptFile to use the configured
> filter
> and actually do some filtering using the code now
> in
> ScannerWorker
> .
> If filters are present, the string matches if it
> matches
> The logic is: If no filter is specified and no
> filters
> are present in the List, the string matches,
> regardless
> any of the filters. If a filter is also supplied in
> the
> method call, that filter is also checked, but only
> if it
> is not already present in the List.
>
> I have tested all changes and they seem to work, and
> they
> are fully backwards compatible, i.e no changes need
> to be
> done to existing configurations - they work as
> before.
>
> Example configuration snippet for a
> DirectoryScanner:
>
> <scanner-name>.filter=.*\.xml
>
> The above filter (obviously) matches files named
> *.xml and
> nothing else. It uses the regular expression support
> in the
> String class. So that might require 1.4-level Java
> support?
>
> Patch attached. Please review, and apply if correct
> && useful.
>
> Regards,
>
> David Kinnvall
> > Index: com/babeldoc/scanner/ScannerWorker.java
>
===================================================================
> RCS file:
>
/cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/ScannerWorker.java,v
> retrieving revision 1.27
> diff -u -r1.27 ScannerWorker.java
> --- com/babeldoc/scanner/ScannerWorker.java 30 Sep
> 2003 14:37:21 -0000 1.27
> +++ com/babeldoc/scanner/ScannerWorker.java 8 Oct
> 2003 09:54:26 -0000
> @@ -65,6 +65,9 @@
> */
> package com.babeldoc.scanner;
>
> +import java.util.ArrayList;
> +import java.util.Iterator;
> +import java.util.List;
> import java.util.Map;
> import java.util.HashMap;
>
> @@ -114,6 +117,18 @@
> /** must the documents be submitted as binaries
> */
> private boolean binary;
>
> + /** List of filters to apply to document names.
> + * If there are any filters at least one must
> + * match for each document to be processed. If
> + * no filters are present every document will
> + * be processed. Note: An additional filter can
> + * also be provided manually through the method
> + * acceptEntry, and that filter, if specified,
> + * will be check in addition to the configured
> + * ones.
> + */
> + private List filters = new ArrayList();
> +
> public static final String SCANNER_KEY =
> "scanner";
> public static final String SCAN_DATE_KEY =
> "scan_date";
> public static final String SCAN_PATH_KEY =
> "scan_path";
> @@ -214,7 +229,11 @@
> }
>
> /**
> - * Does this worker accept this entry
> + * Does this worker accept this entry? The string
> + * is matched against the specified filter as
> well
> + * as against any already configured filters. The
> + * matching will result in true if any match is
> + * found or if there are no filters.
> *
> * @param filter filter string
> * @param string name to be filtered
> @@ -222,16 +241,63 @@
> * @return true if accepted - false otherwise
> */
> public boolean acceptEntry(String filter, String
> string) {
> - return true;
> + if(filters.isEmpty() && (filter == null ||
> filter == "")) {
> + return true;
> + } else {
> + if(!filters.isEmpty()) {
> + Iterator i = filters.iterator();
> + while(i.hasNext()) {
> + if(checkFilter((String)i.next(),
> string)) {
> + return true;
> + }
> + }
> + }
> + if(filter != null && filter != "") {
> + // Don't check filter again, if it's in
> filters.
> + if(filters.isEmpty() ||
> !filters.contains(filter)) {
> + if(checkFilter(filter, string)) {
> + return true;
> + }
> + }
> + }
> + return false;
> + }
> + }
> +
> + /**
> + * Check a filter against a string. If the filter
> is
> + * empty it is considered a match. If both are
> not
> + * empty and the string matches the regular
> expression
> + * of the filter it is considered a match.
> Otherwise
> + * it is considered NOT to be a match.
> + *
> + * @param filter Filter string to match string
> against
> + * @param string String to match against filter
> string
> + * @return boolean True if string matches filter
> + */
> + private boolean checkFilter(String filter, String
> string) {
> + if(filter == null || filter == "") {
> + return true;
> + } else {
> + if(string.matches(filter)) {
> + return true;
> + } else {
> + return false;
> + }
> + }
> }
>
> /**
> - * Add a filter
> + * Add a filter, unless it is already present.
> *
> * @param filter to be added
> */
> public void addFilter(String filter) {
> - //TODO: Implement this
> + synchronized(filters) {
> + if(filter != null &&
> !filters.contains(filter)) {
> + filters.add(filter);
> + }
> + }
> }
>
> /**
> Index:
> com/babeldoc/scanner/worker/DirectoryScanner.java
>
===================================================================
> RCS file:
>
/cvsroot/babeldoc/babeldoc/modules/scanner/src/com/babeldoc/scanner/worker/DirectoryScanner.java,v
> retrieving revision 1.23
> diff -u -r1.23 DirectoryScanner.java
> ---
> com/babeldoc/scanner/worker/DirectoryScanner.java 3
> Oct 2003 13:08:40 -0000 1.23
> +++
> com/babeldoc/scanner/worker/DirectoryScanner.java 8
> Oct 2003 09:54:26 -0000
> @@ -119,6 +119,15 @@
> */
> private int minimumFileAge = 0;
>
> + /** Filename filter, as regular expression, to
> apply
> + * to all scanned files. If not defined it
> will have
> + * no effect, i.e all files will match. If
> defined,
> + * only files matching the regular expression
> will
> + * be processed.
> + */
> + private String filter = null;
> +
> +
> /**
> * This method will scan for new documents. It
> will queue documents by
> * itself, so it will return null no matter how
> many documents found!
> @@ -179,14 +188,21 @@
> + getMinimumFileAge() + " ms");
> }
>
> - //Add filename filter if exist
> - addFilter(FILTER_FILENAME);
> +
>
setFilter(this.getInfo().getStrValue(FILTER_FILENAME));
> +
> + if(getFilter() != null && getFilter() !=
> "") {
> +
> LogService.getInstance().logInfo("Filename filter: "
> + + getFilter());
> + addFilter(getFilter());
> + }
> +
> }
>
> /**
> * release the held resource. Do nothing - no
> held resources.
> */
> public void relinquishResources() {
> + // noop
> }
>
> /**
> @@ -321,7 +337,7 @@
>
=== message truncated ===
__________________________________
Do you Yahoo!?
The New Yahoo! Shopping - with improved product search
http://shopping.yahoo.com
|