From: <jgr...@us...> - 2003-04-27 00:12:03
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv22470/Classifier Modified Files: Bayes.pm MailParse.pm Log Message: Added new pseudowords html:imgwidthX and html:imgheightY used to catch web bugs Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.138 retrieving revision 1.139 diff -C2 -d -r1.138 -r1.139 *** Bayes.pm 26 Apr 2003 09:43:20 -0000 1.138 --- Bayes.pm 27 Apr 2003 00:11:57 -0000 1.139 *************** *** 1287,1291 **** if ( /([^\s]+) (\d+)/ ) { ! my $word = $self->{mangler__}->mangle($1,1); my $value = $2; $value =~ s/[\r\n]//g; --- 1287,1291 ---- if ( /([^\s]+) (\d+)/ ) { ! my $word = $1; my $value = $2; $value =~ s/[\r\n]//g; *************** *** 1345,1349 **** if ( /([^\s]+) (\d+)/ ) { ! my $word = $self->{mangler__}->mangle($1,1); my $value = $2; $value =~ s/[\r\n]//g; --- 1345,1349 ---- if ( /([^\s]+) (\d+)/ ) { ! my $word = $1; my $value = $2; $value =~ s/[\r\n]//g; Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.116 retrieving revision 1.117 diff -C2 -d -r1.116 -r1.117 *** MailParse.pm 25 Apr 2003 07:02:06 -0000 1.116 --- MailParse.pm 27 Apr 2003 00:11:57 -0000 1.117 *************** *** 424,433 **** } } else { # Anything that isn't a mailto is probably an URL ! $self->add_url($value, $encoded, $quote, $end_quote, ''); } ! ! next; } --- 424,434 ---- } } else { + # Anything that isn't a mailto is probably an URL ! $self->add_url($value, $encoded, $quote, $end_quote, ''); } ! ! next; } *************** *** 455,458 **** --- 456,460 ---- # Tags with colors in them + if ( ( $attribute =~ /^color$/i ) && ( $tag =~ /^font$/i ) ) { update_word( $self, $value, $encoded, $quote, $end_quote, '' ); *************** *** 467,471 **** --- 469,481 ---- } + # The width and height of images + + if ( ( $attribute =~ /^(width|height)$/i ) && ( $tag =~ /^img$/i ) ) { + $attribute = lc( $attribute ); + $self->update_pseudoword( 'html', "img$attribute$value" ); + } + # Font sizes + if ( ( $attribute =~ /^size$/i ) && ( $tag =~ /^font$/i ) ) { $self->update_pseudoword( 'html', "fontsize$value" ); |