|
From: <jgr...@us...> - 2003-10-09 18:22:15
|
Update of /cvsroot/popfile/engine
In directory sc8-pr-cvs1:/tmp/cvs-serv11608
Modified Files:
insert.pl
Log Message:
Updated to work with new structure and use API function
Index: insert.pl
===================================================================
RCS file: /cvsroot/popfile/engine/insert.pl,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** insert.pl 31 Jul 2003 16:32:20 -0000 1.23
--- insert.pl 9 Oct 2003 18:22:11 -0000 1.24
***************
*** 25,134 ****
use strict;
! use locale;
! use Classifier::MailParse;
!
! my %words;
!
! # ---------------------------------------------------------------------------------------------
! #
! # load_word_table
! #
! # $bucket The name of the bucket we are loading words for
! #
! # Fills the words hash with the word frequencies for word loaded from the appropriate bucket
! #
! # ---------------------------------------------------------------------------------------------
! sub load_word_table
! {
! my ($bucket) = @_;
!
! # Make sure that the bucket mentioned exists, if it doesn't the create an empty
! # directory and word table
!
! mkdir("corpus");
! mkdir("corpus/$bucket");
!
! print "Loading word table for bucket '$bucket'...\n";
!
! open WORDS, "<corpus/$bucket/table";
!
! # Each line in the word table is a word and a count
!
! while (<WORDS>) {
! if ( /__CORPUS__ __VERSION__ (\d+)/ ) {
! if ( $1 != 1 ) {
! print "Incompatible corpus version in $bucket\n";
! return;
! }
!
! next;
! }
!
! if ( /(.+) (.+)/ ) {
! $words{$1} = $2;
! }
! }
!
! close WORDS;
! }
!
! # ---------------------------------------------------------------------------------------------
! #
! # save_word_table
! #
! # $bucket The name of the bucket we are loading words for
! #
! # Writes the words hash out to a bucket
! #
! # ---------------------------------------------------------------------------------------------
!
! sub save_word_table
! {
! my ($bucket) = @_;
!
! print "Saving word table for bucket '$bucket'...\n";
!
! open WORDS, ">corpus/$bucket/table";
! print WORDS "__CORPUS__ __VERSION__ 1\n";
!
! # Each line in the word table is a word and a count
! foreach my $word (keys %words) {
! print WORDS "$word $words{$word}\n";
! }
! close WORDS;
! }
! # ---------------------------------------------------------------------------------------------
! #
! # split_mail_message
! #
! # $message The name of the file containing the mail message
! #
! # Splits the mail message into valid words and updated the words hash
! #
! # ---------------------------------------------------------------------------------------------
! sub split_mail_message
! {
! my ($message) = @_;
! my $parser = new Classifier::MailParse;
! my $word;
! print "Parsing message '$message'...\n";
! $parser->parse_file($message);
! foreach $word (keys %{$parser->{words__}}) {
! $words{$word} += $parser->{words__}{$word};
! }
! }
! # main
! if ( $#ARGV >= 1 )
! {
! load_word_table($ARGV[0]);
my @files;
--- 25,62 ----
use strict;
! use Classifier::Bayes;
! use POPFile::Configuration;
! use POPFile::MQ;
! use POPFile::Logger;
! if ( $#ARGV > 0 ) {
! my $c = new POPFile::Configuration;
! my $mq = new POPFile::MQ;
! my $l = new POPFile::Logger;
! my $b = new Classifier::Bayes;
! $c->configuration( $c );
! $c->mq( $mq );
! $c->logger( $l );
! $l->configuration( $c );
! $l->mq( $mq );
! $l->logger( $l );
! $l->initialize();
! $mq->configuration( $c );
! $mq->mq( $mq );
! $mq->logger( $l );
! $b->configuration( $c );
! $b->mq( $mq );
! $b->logger( $l );
! $b->initialize();
! $c->load_configuration();
! $b->start();
my @files;
***************
*** 140,150 ****
}
! foreach my $file (@files) {
! split_mail_message($file);
}
-
- save_word_table($ARGV[0]);
-
- print "done.\n";
} else {
print "insert.pl - insert mail messages into a specific bucket\n\n";
--- 68,76 ----
}
! if ( !$b->add_messages_to_bucket( $ARGV[0], @files ) ) {
! print "Bucket $ARGV[0] does not exist\n";
! } else {
! print "Added ", $#files+1, " files to $ARGV[0]\n";
}
} else {
print "insert.pl - insert mail messages into a specific bucket\n\n";
***************
*** 153,154 ****
--- 79,81 ----
print " <messages> Filename of message(s) to insert\n";
}
+
|