|
From: <jgr...@us...> - 2003-10-20 13:06:27
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv22773/Classifier
Modified Files:
Bayes.pm
Log Message:
Create test suites for the utility scripts:
insert.pl
bayes.pl
pipe.pl
insert.pl:
Added code to detect errors (file does not exist,
bucket does not exist), output error messages and
return an error code.
Make sure that all POPFile modules are cleaned up
when done.
bayes.pl:
Added code to detect errors (file does not exist),
output error messages and return an error code.
Make sure that all POPFile modules are cleaned up
when done.
pipe.pl:
Make sure that all POPFile modules are cleaned up
when done.
Pass in \n as the end of line character so that the
STDOUT output will have the right end of line character
for the platform pipe.pl is being used on. If this
is not done then POPFile will use the standard network
line ending: \r\n.
Classifer/Bayes.pm:
Add reference to IO::Handle since we need it for
flush.
Add an optional $crlf option to classify_and_modify so
that it can be used with non-network streams that require
an EOL different from \r\n.
If classify_and_modify is told not to save the file in
the history then there's no need to save the file even
temporarily because the streaming parsing added in v0.20.0
doesn't need a copy of the message on disk.
write_line__ might now be passed an undef file handle so
don't try to write to it.
If we are not saving the classification to the history
then don't add the XPL header (or the equivalent link
in a quarantined message) since it wont work.
POPFile/Configuration.pm:
Add code to determine whether there has been a
configuration change. This is done to prevent the
configuration being saved when nothing has changed.
This is particularly needed by the scripts which wont
change the configuration and shouldn't have the code
of a save.
tests/TestInsertScript.tst:
Test suite for insert.pl.
tests/TestBayesScript.tst:
Test suite for bayes.pl.
tests/TestPipeScript.tst:
Test suite for pipe.pl.
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.213
retrieving revision 1.214
diff -C2 -d -r1.213 -r1.214
*** Bayes.pm 17 Oct 2003 06:12:03 -0000 1.213
--- Bayes.pm 20 Oct 2003 12:53:50 -0000 1.214
***************
*** 34,37 ****
--- 34,38 ----
use Classifier::MailParse;
use Classifier::WordMangle;
+ use IO::Handle;
# This is used to get the hostname of the current machine
***************
*** 1357,1361 ****
my ( $self, $file, $line, $class ) = @_;
! print $file $line;
if ( $class eq '' ) {
--- 1358,1362 ----
my ( $self, $file, $line, $class ) = @_;
! print $file $line if defined( $file );
if ( $class eq '' ) {
***************
*** 1379,1382 ****
--- 1380,1387 ----
# $class - if we already know the classification
# $echo - 1 to echo to the client, 0 to supress, defaults to 1
+ # $crlf - The sequence to use at the end of a line in the output, normally
+ # this is left undefined and this method uses $eol (the normal network end
+ # of line), but if this method is being used with real files you may wish
+ # to pass in \n instead
#
# Returns a classification if it worked and the name of the file where the message
***************
*** 1388,1394 ****
sub classify_and_modify
{
! my ( $self, $mail, $client, $dcount, $mcount, $nosave, $class, $echo ) = @_;
! $echo = 1 unless (defined $echo);
my $msg_subject = ''; # The message subject
--- 1393,1400 ----
sub classify_and_modify
{
! my ( $self, $mail, $client, $dcount, $mcount, $nosave, $class, $echo, $crlf ) = @_;
! $echo = 1 unless (defined $echo);
! $crlf = $eol unless (defined $crlf);
my $msg_subject = ''; # The message subject
***************
*** 1432,1436 ****
# get class file errors
! open TEMP, ">$temp_file";
while ( <$mail> ) {
--- 1438,1442 ----
# get class file errors
! open TEMP, ">$temp_file" unless $nosave;
while ( <$mail> ) {
***************
*** 1468,1472 ****
if ( !( $line =~ /^(\r\n|\r|\n)$/i ) ) {
$message_size += length $line;
! $self->write_line__( \*TEMP, $fileline, $class );
# If there is no echoing occuring, it doesn't matter what we do to these
--- 1474,1478 ----
if ( !( $line =~ /^(\r\n|\r|\n)$/i ) ) {
$message_size += length $line;
! $self->write_line__( $nosave?undef:\*TEMP, $fileline, $class );
# If there is no echoing occuring, it doesn't matter what we do to these
***************
*** 1497,1502 ****
}
} else {
! $self->write_line__( \*TEMP, "\n", $class );
! $message_size += length $eol;
$getting_headers = 0;
}
--- 1503,1508 ----
}
} else {
! $self->write_line__( $nosave?undef:\*TEMP, "\n", $class );
! $message_size += length $crlf;
$getting_headers = 0;
}
***************
*** 1504,1513 ****
$message_size += length $line;
$msg_body .= $line;
! $self->write_line__( \*TEMP, $fileline, $class );
}
# Check to see if too much time has passed and we need to keep the mail client happy
if ( time > ( $last_timeout + 2 ) ) {
! print $client "X-POPFile-TimeoutPrevention: $timeout_count$eol" if ( $echo );
$timeout_count += 1;
$last_timeout = time;
--- 1510,1519 ----
$message_size += length $line;
$msg_body .= $line;
! $self->write_line__( $nosave?undef:\*TEMP, $fileline, $class );
}
# Check to see if too much time has passed and we need to keep the mail client happy
if ( time > ( $last_timeout + 2 ) ) {
! print $client "X-POPFile-TimeoutPrevention: $timeout_count$crlf" if ( $echo );
$timeout_count += 1;
$last_timeout = time;
***************
*** 1517,1521 ****
}
! close TEMP;
# Parse Japanese mail message with Kakasi
--- 1523,1527 ----
}
! close TEMP unless $nosave;
# Parse Japanese mail message with Kakasi
***************
*** 1549,1556 ****
$msg_head_before .= 'Subject:' . $msg_subject;
! $msg_head_before .= $eol;
# Add the XTC header
! $msg_head_after .= "X-Text-Classification: $classification$eol" if ( ( $self->global_config_( 'xtc' ) ) && # PROFILE BLOCK START
( $self->{parameters__}{$classification}{quarantine} == 0 ) ); # PROFILE BLOCK STOP
--- 1555,1562 ----
$msg_head_before .= 'Subject:' . $msg_subject;
! $msg_head_before .= $crlf;
# Add the XTC header
! $msg_head_after .= "X-Text-Classification: $classification$crlf" if ( ( $self->global_config_( 'xtc' ) ) && # PROFILE BLOCK START
( $self->{parameters__}{$classification}{quarantine} == 0 ) ); # PROFILE BLOCK STOP
***************
*** 1560,1570 ****
$xpl .= "http://";
$xpl .= $self->module_config_( 'html', 'local' )?"127.0.0.1":$self->config_( 'hostname' );
! $xpl .= ":" . $self->module_config_( 'html', 'port' ) . "/jump_to_message?view=$nopath_temp_file$eol";
! if ( $self->global_config_( 'xpl' ) && ( $self->{parameters__}{$classification}{quarantine} == 0 ) ) {
$msg_head_after .= 'X-POPFile-Link: ' . $xpl;
}
! $msg_head_after .= $msg_head_q . "$eol";
# Echo the text of the message to the client
--- 1566,1576 ----
$xpl .= "http://";
$xpl .= $self->module_config_( 'html', 'local' )?"127.0.0.1":$self->config_( 'hostname' );
! $xpl .= ":" . $self->module_config_( 'html', 'port' ) . "/jump_to_message?view=$nopath_temp_file$crlf";
! if ( $self->global_config_( 'xpl' ) && ( $self->{parameters__}{$classification}{quarantine} == 0 ) && ( !$nosave ) ) {
$msg_head_after .= 'X-POPFile-Link: ' . $xpl;
}
! $msg_head_after .= $msg_head_q . "$crlf";
# Echo the text of the message to the client
***************
*** 1577,1583 ****
if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( $self->{parameters__}{$classification}{quarantine} == 1 ) {
! print $client "From: " . $self->{parser__}->get_header( 'from' ) . "$eol";
! print $client "To: " . $self->{parser__}->get_header( 'to' ) . "$eol";
! print $client "Date: " . $self->{parser__}->get_header( 'date' ) . "$eol";
if ( $self->global_config_( 'subject' ) ) {
# Don't add the classification unless it is not present
--- 1583,1589 ----
if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( $self->{parameters__}{$classification}{quarantine} == 1 ) {
! print $client "From: " . $self->{parser__}->get_header( 'from' ) . "$crlf";
! print $client "To: " . $self->{parser__}->get_header( 'to' ) . "$crlf";
! print $client "Date: " . $self->{parser__}->get_header( 'date' ) . "$crlf";
if ( $self->global_config_( 'subject' ) ) {
# Don't add the classification unless it is not present
***************
*** 1587,1606 ****
}
}
! print $client "Subject:$msg_subject$eol";
! print $client "X-Text-Classification: $classification$eol" if ( $self->global_config_( 'xtc' ) );
! print $client 'X-POPFile-Link: ' . $xpl if ( $self->global_config_( 'xpl' ) );
! print $client "MIME-Version: 1.0$eol";
! print $client "Content-Type: multipart/report; boundary=\"$nopath_temp_file\"$eol$eol--$nopath_temp_file$eol";
! print $client "Content-Type: text/plain$eol$eol";
! print $client "POPFile has quarantined a message. It is attached to this email.$eol$eol";
! print $client "Quarantined Message Detail$eol$eol";
! print $client "Original From: " . $self->{parser__}->get_header('from') . "$eol";
! print $client "Original To: " . $self->{parser__}->get_header('to') . "$eol";
! print $client "Original Subject: " . $self->{parser__}->get_header('subject') . "$eol";
! print $client "To examine the email open the attachment. To change this mail's classification go to $xpl$eol";
! print $client "The first 20 words found in the email are:$eol$eol";
print $client $self->{parser__}->first20();
! print $client "$eol--$nopath_temp_file$eol";
! print $client "Content-Type: message/rfc822$eol$eol";
}
}
--- 1593,1614 ----
}
}
! print $client "Subject:$msg_subject$crlf";
! print $client "X-Text-Classification: $classification$crlf" if ( $self->global_config_( 'xtc' ) );
! print $client 'X-POPFile-Link: ' . $xpl if ( $self->global_config_( 'xpl' ) && !$nosave );
! print $client "MIME-Version: 1.0$crlf";
! print $client "Content-Type: multipart/report; boundary=\"$nopath_temp_file\"$crlf$crlf--$nopath_temp_file$crlf";
! print $client "Content-Type: text/plain$crlf$crlf";
! print $client "POPFile has quarantined a message. It is attached to this email.$crlf$crlf";
! print $client "Quarantined Message Detail$crlf$crlf";
! print $client "Original From: " . $self->{parser__}->get_header('from') . "$crlf";
! print $client "Original To: " . $self->{parser__}->get_header('to') . "$crlf";
! print $client "Original Subject: " . $self->{parser__}->get_header('subject') . "$crlf";
! print $client "To examine the email open the attachment. ";
! print $client "To change this mail's classification go to $xpl" unless $nosave;
! print $client "$crlf";
! print $client "The first 20 words found in the email are:$crlf$crlf";
print $client $self->{parser__}->first20();
! print $client "$crlf--$nopath_temp_file$crlf";
! print $client "Content-Type: message/rfc822$crlf$crlf";
}
}
***************
*** 1615,1624 ****
if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( ( $self->{parameters__}{$classification}{quarantine} == 1 ) && $echo ) {
! $before_dot = "$eol--$nopath_temp_file--$eol";
}
}
if ( !$got_full_body ) {
! $self->echo_to_dot_( $mail, $echo?$client:undef, '>>' . $temp_file, $before_dot );
} else {
print $client $before_dot if ( $before_dot ne '' );
--- 1623,1632 ----
if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( ( $self->{parameters__}{$classification}{quarantine} == 1 ) && $echo ) {
! $before_dot = "$crlf--$nopath_temp_file--$crlf";
}
}
if ( !$got_full_body ) {
! $self->echo_to_dot_( $mail, $echo?$client:undef, $nosave?undef:'>>' . $temp_file, $before_dot );
} else {
print $client $before_dot if ( $before_dot ne '' );
***************
*** 1626,1635 ****
if ( $echo && $got_full_body ) {
! print $client "$eol.$eol";
}
! if ( $nosave ) {
! unlink( $temp_file );
! } else {
$self->history_write_class($class_file, undef, $classification, undef, ($self->{magnet_used__}?$self->{magnet_detail__}:undef));
--- 1634,1641 ----
if ( $echo && $got_full_body ) {
! print $client "$crlf.$crlf";
}
! if ( !$nosave ) {
$self->history_write_class($class_file, undef, $classification, undef, ($self->{magnet_used__}?$self->{magnet_detail__}:undef));
|