|
From: <jgr...@us...> - 2003-09-19 20:18:56
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv8332/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
MAKE POPFILE BE A TRUE EXE ON WINDOWS
ADDED ATTACHMENT PSEUDOWORDS
Makefile:
At rules for making a variety of POPFile executables:
popfile.exe Loader that picks one of the following four to run:
popfileif.exe Run POPFile in foreground with a tray icon
popfileib.exe Run POPFile in background with a tray icon
popfilef.exe Run POPFile in the foreground
popfileb.exe Run POPFile in the background
Each executable is built with either PerlTray (for those with system
tray icons) or PerlApp (for those with no icon).
popfile-tray.pl:
Equivalent of popfile.pl but with hooks for use by PerlTray for
showing the icon and handling the popup menu.
popfile-windows.pl:
This is the small wrapper that becomes popfile.exe and decides which
version of POPFile to load.
languages/English.msg:
Added the message for the console output option and to warn the user
that the change isn't going to take affect until the next restart.
Platform/MSWin32.pm:
This just contains code now for handling the options windows_trayicon
and windows_console.
Classifier/Bayes.pm:
Removed the second blessing of ourselves because it was not needed.
Classifier/MailParse.pm:
Added pseudowords for attachment names and attachment extensions via a
patch that was submitted.
POPFile/Loader.pm:
New public function module_config() allows a user of the loader to access
configuration items without knowing the internals of Loader.
popfilew.pl:
Removed. This file is replaced by popfile-windows.pl.
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.197
retrieving revision 1.198
diff -C2 -d -r1.197 -r1.198
*** Bayes.pm 15 Sep 2003 14:55:36 -0000 1.197
--- Bayes.pm 19 Sep 2003 20:18:52 -0000 1.198
***************
*** 120,124 ****
$self->name( 'bayes' );
! return bless $self, $type;
}
--- 120,124 ----
$self->name( 'bayes' );
! return $self;
}
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.160
retrieving revision 1.161
diff -C2 -d -r1.160 -r1.161
*** MailParse.pm 11 Sep 2003 08:03:20 -0000 1.160
--- MailParse.pm 19 Sep 2003 20:18:52 -0000 1.161
***************
*** 82,85 ****
--- 82,87 ----
"|samp|small|span|strike|strong|sub|sup|tt|u|var"; # PROFILE BLOCK STOP
+ my $eol = "\015\012";
+
#----------------------------------------------------------------------------
# new
***************
*** 1212,1218 ****
}
! # If we have an email header then just keep the part after the :
! if ( $line =~ /^([A-Za-z-]+):[ \t]*([^\n\r]*)/ ) {
# Parse the last header
--- 1214,1227 ----
}
! # Append to argument if the next line begins with whitespace (isn't a new header)
! if ( $line =~ /^([\t ].+)([^\r\n]+)/ ) {
! $self->{argument__} .= "$eol$1$2";
! next;
! }
!
! # If we have an email header then split it into the header and its argument
!
! if ( $line =~ /^([A-Za-z\-]+):[ \t]*([^\n\r]*)/ ) {
# Parse the last header
***************
*** 1227,1235 ****
}
- # Append to argument if the next line begins with whitespace (isn't a new header)
-
- if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) {
- $self->{argument__} .= "\015\012" . $1;
- }
next;
}
--- 1236,1239 ----
***************
*** 1412,1416 ****
}
-
# ---------------------------------------------------------------------------------------------
#
--- 1416,1419 ----
***************
*** 1542,1546 ****
}
}
! return ($mime, $encoding);
}
--- 1545,1554 ----
}
}
!
! if ( $argument =~ /name=\"(.*)\"/i ) {
! $self->add_attachment_filename( $1 );
! }
!
! return ( $mime, $encoding );
}
***************
*** 1565,1568 ****
--- 1573,1581 ----
$argument = $self->decode_string($argument) unless ($header =~ /^(Received|Content\-Type|Content\-Disposition)$/i);
+ if ( $header =~ /^Content-Disposition$/i ) {
+ $self->handle_disposition( $argument );
+ return ( $mime, $encoding );
+ }
+
add_line( $self, $argument, 0, $prefix );
***************
*** 1570,1573 ****
--- 1583,1670 ----
}
+ # ---------------------------------------------------------------------------------------------
+ #
+ # match_attachment_filename - Matches a line like 'attachment; filename="<filename>"
+ #
+ # $line The line to match
+ # Returns The first match (= "attchment" if found)
+ # The second match (= name of the file if found)
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub match_attachment_filename
+ {
+ my ( $self, $line ) = @_;
+
+ $line =~ /\s*(.*);\s*filename=\"(.*)\"/;
+
+ return ( $1, $2 );
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # file_extension - Splits a filename into name and extension
+ #
+ # $filename The filename to split
+ # Returns The name of the file
+ # The extension of the file
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub file_extension
+ {
+ my ( $self, $filename ) = @_;
+
+ $filename =~ s/(.*)\.(.*)$//;
+
+ if ( length( $1 ) > 0 ) {
+ return ( $1, $2 );
+ } else {
+ return ( $filename, "" );
+ }
+ }
+ # ---------------------------------------------------------------------------------------------
+ #
+ # add_attachment_filename - Adds a file name and extension as pseudo words attchment_name
+ # and attachment_ext
+ #
+ # $filename The filename to add to the list of words
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub add_attachment_filename
+ {
+ my ( $self, $filename ) = @_;
+
+ if ( length( $filename ) > 0) {
+ print "Add filename $filename\n" if $self->{debug};
+
+ my ( $name, $ext ) = $self->file_extension( $filename );
+
+ if ( length( $name ) > 0) {
+ $self->update_pseudoword( 'mimename', $name, 0, $name );
+ }
+
+ if ( length( $ext ) > 0 ) {
+ $self->update_pseudoword( 'mimeextension', $ext, 0, $ext );
+ }
+ }
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # handle_disposition - Parses Content-Disposition header to extract filename.
+ # If filename found, at the file name and extension to the word list
+ #
+ # $params The parameters of the Content-Disposition header
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub handle_disposition
+ {
+ my ( $self, $params ) = @_;
+
+ my ( $attachment, $filename ) = $self->match_attachment_filename( $params );
+
+ if ( $attachment eq 'attachment' ) {
+ $self->add_attachment_filename( $filename ) ;
+ }
+ }
# ---------------------------------------------------------------------------------------------
***************
*** 1579,1583 ****
#
# ---------------------------------------------------------------------------------------------
-
sub splitline
{
--- 1676,1679 ----
|