|
From: <jgr...@us...> - 2003-04-27 00:12:03
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv22470/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
Added new pseudowords html:imgwidthX and html:imgheightY used to catch web bugs
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.138
retrieving revision 1.139
diff -C2 -d -r1.138 -r1.139
*** Bayes.pm 26 Apr 2003 09:43:20 -0000 1.138
--- Bayes.pm 27 Apr 2003 00:11:57 -0000 1.139
***************
*** 1287,1291 ****
if ( /([^\s]+) (\d+)/ ) {
! my $word = $self->{mangler__}->mangle($1,1);
my $value = $2;
$value =~ s/[\r\n]//g;
--- 1287,1291 ----
if ( /([^\s]+) (\d+)/ ) {
! my $word = $1;
my $value = $2;
$value =~ s/[\r\n]//g;
***************
*** 1345,1349 ****
if ( /([^\s]+) (\d+)/ ) {
! my $word = $self->{mangler__}->mangle($1,1);
my $value = $2;
$value =~ s/[\r\n]//g;
--- 1345,1349 ----
if ( /([^\s]+) (\d+)/ ) {
! my $word = $1;
my $value = $2;
$value =~ s/[\r\n]//g;
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.116
retrieving revision 1.117
diff -C2 -d -r1.116 -r1.117
*** MailParse.pm 25 Apr 2003 07:02:06 -0000 1.116
--- MailParse.pm 27 Apr 2003 00:11:57 -0000 1.117
***************
*** 424,433 ****
}
} else {
# Anything that isn't a mailto is probably an URL
!
$self->add_url($value, $encoded, $quote, $end_quote, '');
}
!
! next;
}
--- 424,434 ----
}
} else {
+
# Anything that isn't a mailto is probably an URL
!
$self->add_url($value, $encoded, $quote, $end_quote, '');
}
!
! next;
}
***************
*** 455,458 ****
--- 456,460 ----
# Tags with colors in them
+
if ( ( $attribute =~ /^color$/i ) && ( $tag =~ /^font$/i ) ) {
update_word( $self, $value, $encoded, $quote, $end_quote, '' );
***************
*** 467,471 ****
--- 469,481 ----
}
+ # The width and height of images
+
+ if ( ( $attribute =~ /^(width|height)$/i ) && ( $tag =~ /^img$/i ) ) {
+ $attribute = lc( $attribute );
+ $self->update_pseudoword( 'html', "img$attribute$value" );
+ }
+
# Font sizes
+
if ( ( $attribute =~ /^size$/i ) && ( $tag =~ /^font$/i ) ) {
$self->update_pseudoword( 'html', "fontsize$value" );
|