From: <de...@de...> - 2007-01-12 15:43:51
|
Author: MichaelDaum Date: 2007-01-12 09:43:39 -0600 (Fri, 12 Jan 2007) New Revision: 12511 Modified: twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/data/TWiki/AntiWikiSpamPlugin.txt twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/lib/TWiki/Plugins/AntiWikiSpamPlugin.pm Log: Item3440: added a wrapper around TWiki::Net::getUrl() to cope with different TWiki versions. There really is a TWiki::Func::getUrl() missing. Some more code cleanup. Modified: twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/data/TWiki/AntiWikiSpamPlugin.txt =================================================================== --- twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/data/TWiki/AntiWikiSpamPlugin.txt 2007-01-12 14:49:03 UTC (rev 12510) +++ twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/data/TWiki/AntiWikiSpamPlugin.txt 2007-01-12 15:43:39 UTC (rev 12511) @@ -2,39 +2,43 @@ ---+!! <nop>%TOPIC% -This plugin attempts to reduce the instance of Wiki Spam by using the [[http://moinmoin.wikiwikiweb.de/][MoinMoin]] [[http://moinmoin.wikiwikiweb.de/AntiSpamGlobalSolution][AntiSpamGlobalSolution]] regex's. -Anytime it detects that a saved page contains a string in the regex list, it _only_ refuses to save it. +This plugin attempts to reduce the instance of Wiki Spam by using the +[[http://moinmoin.wikiwikiweb.de/][MoinMoin]] +[[http://moinmoin.wikiwikiweb.de/AntiSpamGlobalSolution][AntiSpamGlobalSolution]] +regex's. Anytime it detects that a saved page contains a string in the regex +list, it _only_ refuses to save it. -*major premise is not to create _any unnecessary load on the server and not to use excess server resources by holding connections open* +*Major premise is not to create any unnecessary load on the server and not to use excess server resources by holding connections open.* -All its processing is _only_ done on save, or on attach. (TWiki:Plugins/BlackListPlugin does alot of processing on _every_ twiki script, including view) +All its processing is _only_ done on save, or on attach. +(TWiki:Plugins/BlackListPlugin does alot of processing on _every_ twiki script, +including view) -It uses a timeout on save to check if the list has changed, so if there are no saves, there is no un-needed network traffic. +It uses a timeout on save to check if the list has changed, so if there are no +saves, there is no un-needed network traffic. -to manually update the list, click [[%SCRIPTURL%/rest/%TOPIC%/forceUpdate][here]] +To manually update the list, click [[%SCRIPTURLPATH{"rest"}%/%TOPIC%/forceUpdate][here]] %TOC% ------- ---++ Plugin Settings Plugin settings are stored as preferences variables. To reference a plugin setting write ==%<nop><plugin>_<setting>%==, i.e. ==%<nop>INTERWIKIPLUGIN_SHORTDESCRIPTION%== * One line description, is shown in the %TWIKIWEB%.TextFormattingRules topic: - * Set SHORTDESCRIPTION = prevents registered Wiki Spam from being saved + * Set SHORTDESCRIPTION = prevents registered Wiki Spam from being saved - * Debug plugin: (See output in =data/debug.txt=) - * Set DEBUG = 0 + * where to get the regex list to check for + * Set ANTISPAMREGEXLISTURL = http://arch.thinkmo.de/cgi-bin/spam-merge - * where to get the regex list to check for - * Set ANTISPAMREGEXLISTURL = http://arch.thinkmo.de/cgi-bin/spam-merge * time in minutes between attempts to get a new version of the list - * Set GETLISTTIMEOUT = 60 + * Set GETLISTTIMEOUT = 60 + * Local spam list (checked before the larger shared one) - * Set LOCALANTISPAMREGEXLISTTOPIC = %TWIKIWEB%.LocalAntiWikiSpamPluginList + * Set LOCALANTISPAMREGEXLISTTOPIC = %TWIKIWEB%.LocalAntiWikiSpamPluginList - * to ensure that only TWikiAdmins can make changes here - * Set ALLOWTOPICCHANGE = TWikiAdminGroup + * to ensure that only <nop>TWikiAdmins can make changes here + * Set ALLOWTOPICCHANGE = %MAINWEB%.TWikiAdminGroup @@ -42,14 +46,14 @@ __Note:__ You do not need to install anything on the browser to use this plugin. The following instructions are for the administrator who installs the plugin on the server where TWiki is running. - * Download the ZIP file from the Plugin web (see below) - * Unzip ==%TOPIC%.zip== in your twiki installation directory. Content: + * Download the ZIP file from the Plugin web (see below) + * Unzip ==%TOPIC%.zip== in your twiki installation directory. Content: | *File:* | *Description:* | | ==data/TWiki/%TOPIC%.txt== | Plugin topic | | ==data/TWiki/%TOPIC%.txt,v== | Plugin topic repository | | ==lib/TWiki/Plugins/%TOPIC%.pm== | Plugin Perl module | - * (Dakar) Visit =configure= in your TWiki installation, and enable the plugin in the {Plugins} section. - * Test if the installation was successful: + * (Dakar) Visit =configure= in your TWiki installation, and enable the plugin in the {Plugins} section. + * Test if the installation was successful: ---++ Plugin Info Modified: twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/lib/TWiki/Plugins/AntiWikiSpamPlugin.pm =================================================================== --- twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/lib/TWiki/Plugins/AntiWikiSpamPlugin.pm 2007-01-12 14:49:03 UTC (rev 12510) +++ twiki/branches/MAIN/twikiplugins/AntiWikiSpamPlugin/lib/TWiki/Plugins/AntiWikiSpamPlugin.pm 2007-01-12 15:43:39 UTC (rev 12511) @@ -42,6 +42,8 @@ $pluginName = 'AntiWikiSpamPlugin'; # Name of this Plugin +$debug = 0; # toggle me + =pod ---++ initPlugin($topic, $web, $user, $installWeb) -> $boolean @@ -72,6 +74,18 @@ =pod +---++ writeDebug($text) + +write debug output if the debug flag is set + +=cut + +sub writeDebug { + TWiki::Func::writeDebug( "- $pluginName - ".$_[0]) if $debug; +} + +=pod + ---++ beforeSaveHandler($text, $topic, $web, $meta ) * =$text= - text _with embedded meta-data tags_ * =$topic= - the name of the topic in the current CGI query @@ -88,18 +102,19 @@ # do not uncomment, use $_[0], $_[1]... instead ### my ( $text, $topic, $web ) = @_; - TWiki::Func::writeDebug( "- ${pluginName}::beforeSaveHandler( $_[2].$_[1] )" ) if $debug; + writeDebug("beforeSaveHandler( $_[2].$_[1] )"); #do localspamlist first my $regexWeb; my $regexTopic = TWiki::Func::getPluginPreferencesValue( 'LOCALANTISPAMREGEXLISTTOPIC' ); - ($regexWeb, $regexTopic) = TWiki::Func::normalizeWebTopicName('TWiki', $regexTopic); + my $twikiWeb = TWiki::Func::getTwikiWebname(); + ($regexWeb, $regexTopic) = TWiki::Func::normalizeWebTopicName($twikiWeb, $regexTopic); if (TWiki::Func::topicExists($regexWeb, $regexTopic) ) { if (($_[1] eq $regexTopic) && ($_[2] eq $regexWeb)) { return; #don't check the anti-spam topic } my ( $meta, $regexs) = TWiki::Func::readTopic($regexWeb, $regexTopic); - checkTextUsingTopic($_[0], $regexs); + checkTextUsingTopic($_[0], $regexs, $_[2], $_[1]); } my $timesUp; @@ -116,7 +131,7 @@ } #use the share spam regexs my $regexs = readWorkFile(${pluginName}.'_regexs'); - checkTextUsingTopic($_[0], $regexs); + checkTextUsingTopic($_[0], $regexs, $_[2], $_[1]); } sub beforeAttachmentSaveHandler @@ -130,9 +145,9 @@ #from BlackListPlugin # check for evil eval() spam in <script> if( $text =~ /<script.*?eval *\(.*?<\/script>/gis ) { #TODO: there's got to be a better way to do this. + TWiki::Func::writeWarning("detected possible javascript exploit at attachment in in $_[2].$_[1]"); throw TWiki::OopsException( 'attention', def=>'attach_error', - params => 'the attach has been rejected by the %TWIKIWEB%.'. - ${pluginName}.' as it contains a possible javascript eval exploit'); + params => 'The attachment has been rejected as it contains a possible javascript eval exploit.'); } beforeSaveHandler($text, $_[1], $_[2]); @@ -140,23 +155,25 @@ sub checkTextUsingTopic { -my ($text, $regexs) = @_; + #my ($text, $regexs, $web, $topic) = @_; - TWiki::Func::writeDebug( "- ${pluginName}::checkTextUsingTopic( )" ) if $debug; + my $web = $_[2]; + my $topic = $_[3]; + writeDebug("checkTextUsingTopic( )"); + #load text as a set of regex's, and eval - foreach my $regexLine (split(/\n/, $regexs)) { + foreach my $regexLine (split(/\n/, $_[1])) { $regexLine =~ /([^#]*)\s*#?/; my $regex = $1; $regex =~ s/^\s+//; $regex =~ s/\s+$//; if ($regex ne '') { if ( $_[0] =~ /$regex/i ) { - TWiki::Func::writeWarning('MATCH]]'.$regex.'[['); -#TODO: make this a nicer error, or make its own template + TWiki::Func::writeWarning("detected spam at $web.$topic (regex=$regex)"); + # TODO: make this a nicer error, or make its own template throw TWiki::OopsException( 'attention', def=>'save_error', - params => 'the topic save has been rejected by the %TWIKIWEB%.'. - ${pluginName}.' as it matches content that may be WikiSpam ('.$regex.')'); + params => "The topic <nop>$web.$topic has been rejected as it may contain spam."); } } } @@ -164,7 +181,7 @@ sub getSharedSpamData { - TWiki::Func::writeDebug( "- ${pluginName}::getSharedSpamData( )" ) if $debug; + writeDebug("getSharedSpamData( )"); my $getSharedSpamLock = readWorkFile(${pluginName}.'_lock'); @@ -191,11 +208,11 @@ =cut sub forceUpdate { - TWiki::Func::writeDebug(${pluginName}.' about to forceUpdate') if $debug; + writeDebug('about to forceUpdate'); getSharedSpamData(); - TWiki::Func::writeDebug(${pluginName}.' forceUpdate complete') if $debug; + writeDebug('forceUpdate complete'); - return ${pluginName}.' SharedSpamList forceUpdate complete '; + return ${pluginName}.': SharedSpamList forceUpdate complete '; } sub saveWorkFile($$) { @@ -221,53 +238,65 @@ #simplified version of INCLUDE, why we have policy mixed in with implementation is bejond me sub includeUrl($) { my $theUrl = shift; - my $text; + my $text = ''; my $host = ''; my $port = 80; my $path = ''; my $user = ''; my $pass = ''; + my $protocol = 'http'; - if( $theUrl =~ /http\:\/\/(.+)\:(.+)\@([^\:]+)\:([0-9]+)(\/.*)/ ) { - ( $user, $pass, $host, $port, $path ) = ( $1, $2, $3, $4, $5 ); - } elsif( $theUrl =~ /http\:\/\/(.+)\:(.+)\@([^\/]+)(\/.*)/ ) { - ( $user, $pass, $host, $path ) = ( $1, $2, $3, $4 ); - } elsif( $theUrl =~ /http\:\/\/([^\:]+)\:([0-9]+)(\/.*)/ ) { - ( $host, $port, $path ) = ( $1, $2, $3 ); - } elsif( $theUrl =~ /http\:\/\/([^\/]+)(\/.*)/ ) { - ( $host, $path ) = ( $1, $2 ); + + if( $theUrl =~ /(https?)\:\/\/(.+)\:(.+)\@([^\:]+)\:([0-9]+)(\/.*)/ ) { + ( $protocol, $user, $pass, $host, $port, $path ) = ( $1, $2, $3, $4, $5, $6 ); + } elsif( $theUrl =~ /(https?)\:\/\/(.+)\:(.+)\@([^\/]+)(\/.*)/ ) { + ( $protocol, $user, $pass, $host, $path ) = ( $1, $2, $3, $4, $5 ); + } elsif( $theUrl =~ /(https?)\:\/\/([^\:]+)\:([0-9]+)(\/.*)/ ) { + ( $protocol, $host, $port, $path ) = ( $1, $2, $3, $4 ); + } elsif( $theUrl =~ /(https?)\:\/\/([^\/]+)(\/.*)/ ) { + ( $protocol, $host, $path ) = ( $1, $2, $3 ); } else { # $text = TWiki::Plugins::SESSION->inlineAlert( 'alerts', 'bad_protocol', $theUrl ); return $text; } try { - $text = $TWiki::Plugins::SESSION->{net}->getUrl( $host, $port, $path, $user, $pass ); + $text = getUrl( $protocol, $host, $port, $path, $user, $pass ); $text =~ s/\r\n/\n/gs; $text =~ s/\r/\n/gs; $text =~ s/^(.*?\n)\n(.*)/$2/s; - my $httpHeader = $1; - my $contentType = ''; - if( $httpHeader =~ /content\-type\:\s*([^\n]*)/ois ) { - $contentType = $1; - } - if( $contentType =~ /^text\/html/ ) { - $path =~ s/[#?].*$//; - $host = 'http://'.$host; - if( $port != 80 ) { - $host .= ":$port"; - } -# $text = $TWiki::Plugins::SESSION->_cleanupIncludedHTML( $text, $host, $path, $disableremoveheaders, $disableremovescript, $disableremovebody, $disablecompresstags, $disablerewriteurls ) unless $theRaw; - } elsif( $contentType =~ /^text\/(plain|css)/ ) { - # do nothing - } else { - #bad content - } } catch Error with { + my $e = shift->stringify(); + TWiki::Func::writeWarning("$pluginName - $e"); }; return $text; } +=pod + +---++ getUrl() -> $text + +Local wrapper for different interfaces in TWiki<4.0, TWiki-4.0 and TWiki-4.1 +This would not be necessary if there was a TWiki::Func::getUrl() API + +=cut + + +sub getUrl { + my ($protocol, $host, $port, $path, $user, $pass) = @_; + + # TWiki 01 Sep 2004 and older + return TWiki::Net::getUrl($host, $port, $path, $user, $pass) + if $TWiki::Plugins::VERSION < 1.1; + + # TWiki 4.0 + return $TWiki::Plugins::SESSION->{net}->getUrl($host, $port, $path, $user, $pass) + if $TWiki::Plugins::VERSION < 1.11; + + # TWiki 4.1 + return $TWiki::Plugins::SESSION->{net}->getUrl($protocol, $host, $port, $path, $user, $pass); +} + 1; |