Author: CrawfordCurrie Date: 2007-02-27 04:30:34 -0600 (Tue, 27 Feb 2007) New Revision: 12982 Added: twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/ twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Forking.pm twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Native.pm twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/PurePerl.pm Modified: twiki/branches/MAIN/lib/TWiki.spec twiki/branches/MAIN/lib/TWiki/Configure/Types/SELECTCLASS.pm twiki/branches/MAIN/lib/TWiki/Store/RcsFile.pm twiki/branches/MAIN/test/unit/Fn_SEARCH.pm twiki/branches/MAIN/tools/native_search/cgrep.c Log: Item3443: added testcase for all search algorithms; abstracted the algorithms to be pluggable; fixed a potential issue with an empty search string in cgrep. Fixed SELECTCLASS to ignore non-options Modified: twiki/branches/MAIN/lib/TWiki/Configure/Types/SELECTCLASS.pm =================================================================== --- twiki/branches/MAIN/lib/TWiki/Configure/Types/SELECTCLASS.pm 2007-02-27 05:57:06 UTC (rev 12981) +++ twiki/branches/MAIN/lib/TWiki/Configure/Types/SELECTCLASS.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -31,8 +31,8 @@ sub prompt { my( $this, $id, $opts, $value ) = @_; my @ropts; + $opts =~ s/\s.*$//; # remove e.g. EXPERT foreach my $opt (split( /,/, $opts)) { - $opt =~ s/\s+//g; if ($opt eq 'none') { push(@ropts, 'none'); } else { @@ -49,8 +49,6 @@ sub findClasses { my ($this, $pattern) = @_; - $pattern =~ s/^\s+//; - $pattern =~ s/\s+$//; $pattern =~ s/\*/.*/g; my @path = split(/::/, $pattern); Modified: twiki/branches/MAIN/lib/TWiki/Store/RcsFile.pm =================================================================== --- twiki/branches/MAIN/lib/TWiki/Store/RcsFile.pm 2007-02-27 05:57:06 UTC (rev 12981) +++ twiki/branches/MAIN/lib/TWiki/Store/RcsFile.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -327,101 +327,17 @@ sub searchInWebContent { my( $this, $searchString, $topics, $options ) = @_; ASSERT(defined $options) if DEBUG; - my $type = $options->{type} || ''; my $sDir = $TWiki::cfg{DataDir}.'/'.$this->{web}.'/'; - my $matches = ''; - my %seen; - if ($TWiki::cfg{RCS}{SearchAlgorithm}) { - # WikiRing native search - if ($TWiki::cfg{RCS}{SearchAlgorithm} eq 'Native') { - eval 'use NativeTWikiSearch'; - die $@ if $@; - my @fs; - push(@fs, "-i") unless $options->{casesensitive}; - push(@fs, "-l") if $options->{files_without_match}; - push(@fs, $searchString); - push(@fs, map { "$sDir/$_.txt" } @$topics); - my $matches = NativeTWikiSearch::cgrep(\@fs); - if (defined($matches)) { - for (@$matches) { - # Note use of / and \ as dir separators, to support - # Winblows - if (/([^\/\\]*)\.txt(:(.*))?$/) { - push( @{$seen{$1}}, $3 ); - } - } - } - return \%seen; - } - - if ($TWiki::cfg{RCS}{SearchAlgorithm} eq 'PurePerl') { - # Pure-perl grep - local $/ = "\n"; - if ($type eq 'regex') { - $searchString =~ s!/!\\/!g; - } else { - $searchString =~ s/(\W)/\\$1/g; - } - # Convert GNU grep \< \> syntax to \b - $searchString =~ s/(?<!\\)\\[<>]/\\b/g; - my $match_code = "return \$_[0] =~ m/$searchString/o"; - $match_code .= 'i' unless ($options->{casesensitive}); - my $doMatch = eval "sub { $match_code }"; - FILE: - foreach my $file ( @$topics ) { - next unless open(FILE, "<$sDir/$file.txt"); - while (my $line = <FILE>) { - if (&$doMatch($line)) { - chomp($line); - push( @{$seen{$file}}, $line ); - if ($options->{files_without_match}) { - close(FILE); - next FILE; - } - } - } - close(FILE); - } - return \%seen; - } + unless ($this->{searchFn}) { + eval "require $TWiki::cfg{RCS}{SearchAlgorithm}"; + die "Bad {RCS}{SearchAlgorithm}; suggest you run configure and select a different algorithm\n$@" if $@; + $this->{searchFn} = $TWiki::cfg{RCS}{SearchAlgorithm}.'::search'; } - - # Default (Forking) search - - # I18N: 'grep' must use locales if needed, - # for case-insensitive searching. See TWiki::setupLocale. - my $program = ''; - # FIXME: For Cygwin grep, do something about -E and -F switches - # - best to strip off any switches after first space in - # EgrepCmd etc and apply those as argument 1. - if( $type eq 'regex' ) { - $program = $TWiki::cfg{RCS}{EgrepCmd}; - } else { - $program = $TWiki::cfg{RCS}{FgrepCmd}; - } - - $program =~ s/%CS{(.*?)\|(.*?)}%/$options->{casesensitive}?$1:$2/ge; - $program =~ s/%DET{(.*?)\|(.*?)}%/$options->{files_without_match}?$2:$1/ge; - # process topics in sets, fix for Codev.ArgumentListIsTooLongForSearch - my $maxTopicsInSet = 512; # max number of topics for a grep call - my @take = @$topics; - my @set = splice( @take, 0, $maxTopicsInSet ); - my $sandbox = $this->{session}->{sandbox}; - while( @set ) { - @set = map { "$sDir/$_.txt" } @set; - my ($m, $exit ) = $sandbox->sysCommand( - $program, - TOKEN => $searchString, - FILES => \@set); - $matches .= $m; - @set = splice( @take, 0, $maxTopicsInSet ); - } - # Note use of / and \ as dir separators, to support - # Winblows - $matches =~ s/([^\/\\]*)\.txt(:(.*))?$/push( @{$seen{$1}}, $3 ); ''/gem; - - return \%seen; + no strict 'refs'; + return &{$this->{searchFn}}($searchString, $topics, $options, + $sDir, $this->{session}->{sandbox}); + use strict 'refs'; } =pod Added: twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Forking.pm =================================================================== --- twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Forking.pm (rev 0) +++ twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Forking.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -0,0 +1,81 @@ +# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/ +# +# Copyright (C) 2002 John Talintyre, joh...@bt... +# Copyright (C) 2002-2007 Peter Thoeny, pe...@th... +# and TWiki Contributors. All Rights Reserved. TWiki Contributors +# are listed in the AUTHORS file in the root of this distribution. +# NOTE: Please extend that file, not this notice. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. For +# more details read LICENSE in the root of this distribution. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# As per the GPL, removal of this notice is prohibited. + +package TWiki::Store::SearchAlgorithms::Forking; + +use strict; + +=pod + +---+ package TWiki::Store::SearchAlgorithms::Forking + +Forking implementation of the RCS cache search. + +---++ search($searchString, $topics, $options, $sDir) -> \%seen +Search .txt files in $dir for $string. See RcsFile::searchInWebContent +for details. + +=cut + +sub search { + my( $searchString, $topics, $options, $sDir, $sandbox ) = @_; + + # Default (Forking) search + + # I18N: 'grep' must use locales if needed, + # for case-insensitive searching. + my $program = ''; + + # FIXME: For Cygwin grep, do something about -E and -F switches + # - best to strip off any switches after first space in + # EgrepCmd etc and apply those as argument 1. + if( $options->{type} && $options->{type} eq 'regex' ) { + $program = $TWiki::cfg{RCS}{EgrepCmd}; + } else { + $program = $TWiki::cfg{RCS}{FgrepCmd}; + } + + $program =~ s/%CS{(.*?)\|(.*?)}%/$options->{casesensitive}?$1:$2/ge; + $program =~ s/%DET{(.*?)\|(.*?)}%/$options->{files_without_match}?$2:$1/ge; + # process topics in sets, fix for Codev.ArgumentListIsTooLongForSearch + my $maxTopicsInSet = 512; # max number of topics for a grep call + my @take = @$topics; + my @set = splice( @take, 0, $maxTopicsInSet ); + my $matches = ''; + + while( @set ) { + @set = map { "$sDir/$_.txt" } @set; + my ($m, $exit ) = $sandbox->sysCommand( + $program, + TOKEN => $searchString, + FILES => \@set); + throw Error::Simple("$program failed: $m") if $exit; + $matches .= $m; + @set = splice( @take, 0, $maxTopicsInSet ); + } + my %seen; + # Note use of / and \ as dir separators, to support + # Winblows + $matches =~ s/([^\/\\]*)\.txt(:(.*))?$/push( @{$seen{$1}}, $3 ); ''/gem; + + return \%seen; +} + +1; Added: twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Native.pm =================================================================== --- twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Native.pm (rev 0) +++ twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/Native.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -0,0 +1,62 @@ +# +# Copyright (C) 2007 TWiki Contributors. All Rights Reserved. +# TWiki Contributors are listed in the AUTHORS file in the root +# of this distribution. NOTE: Please extend that file, not this notice. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. For +# more details read LICENSE in the root of this distribution. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# As per the GPL, removal of this notice is prohibited. +# +package TWiki::Store::SearchAlgorithms::Native; + +use NativeTWikiSearch; + +=pod + +---+ package TWiki::Store::SearchAlgorithms::Native + +Native implementation of the RCS cache search. Requires tools/native_search +to be built and installed. + +---++ search($searchString, $topics, $options, $sDir) -> \%seen +Search .txt files in $dir for $string. See RcsFile::searchInWebContent +for details. + +Rude and crude, this makes no attempt to handle UTF-8. + +=cut + +sub search { + my ($searchString, $topics, $options, $sDir) = @_; + if (!$options->{type} || $options->{type} ne 'regex') { + # Escape non-word chars in search string for plain text search + $searchString =~ s/(\W)/\\$1/g; + } + my @fs; + push(@fs, '-i') unless $options->{casesensitive}; + push(@fs, '-l') if $options->{files_without_match}; + push(@fs, $searchString); + push(@fs, map { "$sDir/$_.txt" } @$topics); + my $matches = NativeTWikiSearch::cgrep(\@fs); + my %seen; + if (defined($matches)) { + for (@$matches) { + # Note use of / and \ as dir separators, to support + # Winblows + if (/([^\/\\]*)\.txt(:(.*))?$/) { + push( @{$seen{$1}}, $3 ); + } + } + } + return \%seen; +} + +1; Added: twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/PurePerl.pm =================================================================== --- twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/PurePerl.pm (rev 0) +++ twiki/branches/MAIN/lib/TWiki/Store/SearchAlgorithms/PurePerl.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -0,0 +1,68 @@ +# +# Copyright (C) 2007 TWiki Contributors. All Rights Reserved. +# TWiki Contributors are listed in the AUTHORS file in the root +# of this distribution. NOTE: Please extend that file, not this notice. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. For +# more details read LICENSE in the root of this distribution. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# As per the GPL, removal of this notice is prohibited. +# + +package TWiki::Store::SearchAlgorithms::PurePerl; + +=pod + +---+ package TWiki::Store::SearchAlgorithms::PurePerl + +Pure perl implementation of the RCS cache search. + +---++ search($searchString, $topics, $options, $sDir) -> \%seen +Search .txt files in $dir for $string. See RcsFile::searchInWebContent +for details. + +=cut + +sub search { + my ($searchString, $topics, $options, $sDir) = @_; + local $/ = "\n"; + my %seen; + if ($options->{type} && $options->{type} eq 'regex') { + # Escape /, used as delimiter. This also blocks any attempt to use + # the search string to execute programs on the server. + $searchString =~ s!/!\\/!g; + } else { + # Escape non-word chars in search string for plain text search + $searchString =~ s/(\W)/\\$1/g; + } + # Convert GNU grep \< \> syntax to \b + $searchString =~ s/(?<!\\)\\[<>]/\\b/g; + my $match_code = "return \$_[0] =~ m/$searchString/o"; + $match_code .= 'i' unless ($options->{casesensitive}); + my $doMatch = eval "sub { $match_code }"; + FILE: + foreach my $file ( @$topics ) { + next unless open(FILE, "<$sDir/$file.txt"); + while (my $line = <FILE>) { + if (&$doMatch($line)) { + chomp($line); + push( @{$seen{$file}}, $line ); + if ($options->{files_without_match}) { + close(FILE); + next FILE; + } + } + } + close(FILE); + } + return \%seen; +} + +1; Modified: twiki/branches/MAIN/lib/TWiki.spec =================================================================== --- twiki/branches/MAIN/lib/TWiki.spec 2007-02-27 05:57:06 UTC (rev 12981) +++ twiki/branches/MAIN/lib/TWiki.spec 2007-02-27 10:30:34 UTC (rev 12982) @@ -892,22 +892,22 @@ $TWiki::cfg{RCS}{delRevCmd} = "$TWiki::cfg{RCS}{BinDir}/rcs $TWiki::cfg{RCS}{ExtOption} -o%REVISION|N% %FILENAME|F%"; -# **SELECT Forking,Native,PurePerl EXPERT** -# TWiki has three built-in search algorithms +# **SELECTCLASS TWiki::Store::SearchAlgorithms::* ** +# TWiki RCS has three built-in search algorithms # <ol><li> The default 'Forking' algorithm, which forks a subprocess that # runs a 'grep' command, # <li>the 'Native' implementation, which uses a search implemented in a -# special library, </li> +# special library (see http://twiki.org/cgi-bin/view/Codev/NativeSearch), </li> # </li><li> the 'PurePerl' implementation, which is written in Perl and # usually only used as a last resort.</li></ol> # Normally you will be just fine with the 'Forking' implementation. However # if you find searches run very slowly, you may want to try a different -# algorithm. -$TWiki::cfg{RCS}{SearchAlgorithm} = 'Forking'; +# algorithm, which may work better on your configuration. +$TWiki::cfg{RCS}{SearchAlgorithm} = 'TWiki::Store::SearchAlgorithms::Forking'; # **COMMAND EXPERT** # Full path to GNU-compatible egrep program. This is used for searching when -# {SearchAlgorithm} is 'Forking'. +# {SearchAlgorithm} is 'TWiki::Store::SearchAlgorithms::Forking'. # %CS{|-i}% will be expanded # to -i for case-sensitive search or to the empty string otherwise. # Similarly for %DET, which controls whether matching lines are required. @@ -916,7 +916,7 @@ # **COMMAND EXPERT** # Full path to GNU-compatible fgrep program. This is used for searching when -# {SearchAlgorithm} is 'Forking'. +# {SearchAlgorithm} is 'TWiki::Store::SearchAlgorithms::Forking'. $TWiki::cfg{RCS}{FgrepCmd} = "/bin/fgrep" . ' %CS{|-i}% %DET{|-l}% -H -- %TOKEN|U% %FILES|F%'; # **PATH** Modified: twiki/branches/MAIN/test/unit/Fn_SEARCH.pm =================================================================== --- twiki/branches/MAIN/test/unit/Fn_SEARCH.pm 2007-02-27 05:57:06 UTC (rev 12981) +++ twiki/branches/MAIN/test/unit/Fn_SEARCH.pm 2007-02-27 10:30:34 UTC (rev 12982) @@ -1,6 +1,7 @@ use strict; # tests for the correct expansion of SEARCH +# SMELL: this test is pathetic, becase SEARCH has dozens of untested modes package Fn_SEARCH; @@ -14,25 +15,84 @@ return $self; } -sub test_SEARCH_Item2625 { +sub set_up { my $this = shift; + $this->SUPER::set_up(); $this->{twiki}->{store}->saveTopic( $this->{twiki}->{user}, $this->{test_web}, - 'OkTopic', "BLEEGLE"); + 'OkTopic', "BLEEGLE blah/matchme.blah"); $this->{twiki}->{store}->saveTopic( $this->{twiki}->{user}, $this->{test_web}, - 'Ok-Topic', "BLEEGLE"); + 'Ok-Topic', "BLEEGLE dontmatchme.blah"); $this->{twiki}->{store}->saveTopic( $this->{twiki}->{user}, $this->{test_web}, - 'Ok+Topic', "BLEEGLE"); + 'Ok+Topic', "BLEEGLE dont.matchmeblah"); +} + +# Add tests in this function; it is invoked for each algorithm +sub std_tests { + my $this = shift; + my $result = $this->{twiki}->handleCommonTags( '%SEARCH{"BLEEGLE" topic="Ok-Topic,Ok+Topic,OkTopic" nonoise="on" format="$topic"}%', $this->{test_web}, $this->{test_topic}); $this->assert_matches(qr/OkTopic/, $result); $this->assert_matches(qr/Ok-Topic/, $result); - $this->assert_matches(qr/Ok+Topic/, $result); + $this->assert_matches(qr/Ok\+Topic/, $result); + + # Test regex with \< and \>, used in rename searches + $result = $this->{twiki}->handleCommonTags( + '%SEARCH{"\<matc[h]me\>" type="regex" topic="Ok-Topic,Ok+Topic,OkTopic" nonoise="on" format="$topic"}%', + $this->{test_web}, $this->{test_topic}); + + $this->assert_matches(qr/OkTopic/, $result); + $this->assert_does_not_match(qr/Ok-Topic/, $result); + $this->assert_does_not_match(qr/Ok\+Topic/, $result); + + # Test topic name search + $result = $this->{twiki}->handleCommonTags( + '%SEARCH{"Ok.*" type="regex" scope="topic" nonoise="on" format="$topic"}%', + $this->{test_web}, $this->{test_topic}); + + $this->assert_matches(qr/OkTopic/, $result); + $this->assert_matches(qr/Ok-Topic/, $result); + $this->assert_matches(qr/Ok\+Topic/, $result); } +sub test_SEARCH_Forking { + my $this = shift; + + $TWiki::cfg{RCS}{SearchAlgorithm} = + "TWiki::Store::SearchAlgorithms::Forking"; + + $this->std_tests(); +} + +sub test_SEARCH_PurePerl { + my $this = shift; + + $TWiki::cfg{RCS}{SearchAlgorithm} = + "TWiki::Store::SearchAlgorithms::PurePerl"; + + $this->std_tests(); +} + +sub test_SEARCH_Native { + my $this = shift; + + # Need to try all three of the default algorithms + eval "require TWiki::Store::SearchAlgorithms::Native"; + if ($@) { + print STDERR "WARNING: unable to test native search, extension module may not be installed: $@\n"; + return; + } + + $TWiki::cfg{RCS}{SearchAlgorithm} = + "TWiki::Store::SearchAlgorithms::Native"; + + $this->std_tests(); +} + 1; Modified: twiki/branches/MAIN/tools/native_search/cgrep.c =================================================================== --- twiki/branches/MAIN/tools/native_search/cgrep.c 2007-02-27 05:57:06 UTC (rev 12981) +++ twiki/branches/MAIN/tools/native_search/cgrep.c 2007-02-27 10:30:34 UTC (rev 12982) @@ -83,10 +83,12 @@ } else { /* Convert \< and \> to \b in the pattern. GNU grep supports them, but pcre doesn't :-( */ - for (linebuf = arg; *linebuf; linebuf++) { - if (*linebuf == '\\' && *(linebuf-1) != '\\' && - *(linebuf+1) == '<' || *(linebuf+1) == '>') - *(linebuf+1) = 'b'; + if (*arg) { + for (linebuf = arg + 1; *linebuf; linebuf++) { + if (*linebuf == '\\' && *(linebuf-1) != '\\' && + *(linebuf+1) == '<' || *(linebuf+1) == '>') + *(linebuf+1) = 'b'; + } } if (!(pattern = pcre_compile(arg, reflags, &err, &errPos, NULL))) { warn(err); |