|
From: <jgr...@us...> - 2003-06-14 21:10:15
|
Update of /cvsroot/popfile/engine
In directory sc8-pr-cvs1:/tmp/cvs-serv29263
Modified Files:
bayes.pl insert.pl popfile.pl stopwords
Log Message:
Added new message queuing engine for asynchronous message passing, use it for classification data and UI registrations, remove mcount and ecount global variables, add new access to all configuration options on Advanced page
Index: bayes.pl
===================================================================
RCS file: /cvsroot/popfile/engine/bayes.pl,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -d -r1.15 -r1.16
*** bayes.pl 18 Feb 2003 14:50:10 -0000 1.15
--- bayes.pl 14 Jun 2003 21:10:12 -0000 1.16
***************
*** 16,24 ****
{
my $b = new Classifier::Bayes;
!
if ( $b->initialize() == 0 ) {
die "Failed to start while initializing the classifier module";
}
!
$b->{debug} = 1;
$b->{parser}->{debug} = 0;
--- 16,24 ----
{
my $b = new Classifier::Bayes;
!
if ( $b->initialize() == 0 ) {
die "Failed to start while initializing the classifier module";
}
!
$b->{debug} = 1;
$b->{parser}->{debug} = 0;
***************
*** 30,34 ****
print "$file is '" . $b->classify_file($file) . "'\n";
}
!
foreach my $word (keys %{$b->{parser}->{words}}) {
print "$word $b->{parser}->{words}{$word}\n";
--- 30,34 ----
print "$file is '" . $b->classify_file($file) . "'\n";
}
!
foreach my $word (keys %{$b->{parser}->{words}}) {
print "$word $b->{parser}->{words}{$word}\n";
***************
*** 40,42 ****
print "Usage: bayes.pl <messages>\n";
print " <messages> Filename of message(s) to classify\n";
! }
\ No newline at end of file
--- 40,42 ----
print "Usage: bayes.pl <messages>\n";
print " <messages> Filename of message(s) to classify\n";
! }
Index: insert.pl
===================================================================
RCS file: /cvsroot/popfile/engine/insert.pl,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** insert.pl 22 Jan 2003 18:32:23 -0000 1.20
--- insert.pl 14 Jun 2003 21:10:12 -0000 1.21
***************
*** 26,30 ****
{
my ($bucket) = @_;
!
# Make sure that the bucket mentioned exists, if it doesn't the create an empty
# directory and word table
--- 26,30 ----
{
my ($bucket) = @_;
!
# Make sure that the bucket mentioned exists, if it doesn't the create an empty
# directory and word table
***************
*** 32,42 ****
mkdir("corpus");
mkdir("corpus/$bucket");
!
print "Loading word table for bucket '$bucket'...\n";
!
open WORDS, "<corpus/$bucket/table";
!
# Each line in the word table is a word and a count
!
while (<WORDS>) {
if ( /__CORPUS__ __VERSION__ (\d+)/ ) {
--- 32,42 ----
mkdir("corpus");
mkdir("corpus/$bucket");
!
print "Loading word table for bucket '$bucket'...\n";
!
open WORDS, "<corpus/$bucket/table";
!
# Each line in the word table is a word and a count
!
while (<WORDS>) {
if ( /__CORPUS__ __VERSION__ (\d+)/ ) {
***************
*** 45,57 ****
return;
}
!
next;
}
!
if ( /(.+) (.+)/ ) {
$words{$1} = $2;
}
}
!
close WORDS;
}
--- 45,57 ----
return;
}
!
next;
}
!
if ( /(.+) (.+)/ ) {
$words{$1} = $2;
}
}
!
close WORDS;
}
***************
*** 72,85 ****
print "Saving word table for bucket '$bucket'...\n";
!
open WORDS, ">corpus/$bucket/table";
print WORDS "__CORPUS__ __VERSION__ 1\n";
!
# Each line in the word table is a word and a count
!
foreach my $word (keys %words) {
print WORDS "$word $words{$word}\n";
}
!
close WORDS;
}
--- 72,85 ----
print "Saving word table for bucket '$bucket'...\n";
!
open WORDS, ">corpus/$bucket/table";
print WORDS "__CORPUS__ __VERSION__ 1\n";
!
# Each line in the word table is a word and a count
!
foreach my $word (keys %words) {
print WORDS "$word $words{$word}\n";
}
!
close WORDS;
}
***************
*** 104,110 ****
$parser->parse_stream($message);
!
! foreach $word (keys %{$parser->{words}}) {
! $words{$word} += $parser->{words}{$word};
}
}
--- 104,110 ----
$parser->parse_stream($message);
!
! foreach $word (keys %{$parser->{words__}}) {
! $words{$word} += $parser->{words__}{$word};
}
}
***************
*** 112,116 ****
# main
! if ( $#ARGV >= 1 )
{
load_word_table($ARGV[0]);
--- 112,116 ----
# main
! if ( $#ARGV >= 1 )
{
load_word_table($ARGV[0]);
***************
*** 123,133 ****
@files = map { glob } @ARGV[1 .. $#ARGV];
}
!
foreach my $file (@files) {
split_mail_message($file);
}
!
save_word_table($ARGV[0]);
!
print "done.\n";
} else {
--- 123,133 ----
@files = map { glob } @ARGV[1 .. $#ARGV];
}
!
foreach my $file (@files) {
split_mail_message($file);
}
!
save_word_table($ARGV[0]);
!
print "done.\n";
} else {
***************
*** 136,138 ****
print " <bucket> The name of the bucket\n";
print " <messages> Filename of message(s) to insert\n";
! }
\ No newline at end of file
--- 136,138 ----
print " <bucket> The name of the bucket\n";
print " <messages> Filename of message(s) to insert\n";
! }
Index: popfile.pl
===================================================================
RCS file: /cvsroot/popfile/engine/popfile.pl,v
retrieving revision 1.210
retrieving revision 1.211
diff -C2 -d -r1.210 -r1.211
*** popfile.pl 30 May 2003 19:39:17 -0000 1.210
--- popfile.pl 14 Jun 2003 21:10:12 -0000 1.211
***************
*** 91,96 ****
}
-
-
# ---------------------------------------------------------------------------------------------
#
--- 91,94 ----
***************
*** 236,240 ****
#
! my ( $major_version, $minor_version, $build_version ) = ( 0, 19, 0 );
my $version_string = "v$major_version.$minor_version.$build_version";
--- 234,238 ----
#
! my ( $major_version, $minor_version, $build_version ) = ( 0, 20, 0 );
my $version_string = "v$major_version.$minor_version.$build_version";
***************
*** 253,256 ****
--- 251,259 ----
$SIG{CHLD} = $on_windows?'IGNORE':\&reaper;
+ # I've seen spurious ALRM signals happen on Windows so here we for safety
+ # say that we want to ignore them
+
+ $SIG{ALRM} = 'IGNORE';
+
# Create the main objects that form the core of POPFile. Consists of the configuration
# modules, the classifier, the UI (currently HTML based), and the POP3 proxy.
***************
*** 289,307 ****
$components{$type}{$name}->configuration( $components{core}{config} );
$components{$type}{$name}->logger( $components{core}{logger} ) if ( $name ne 'logger' );
}
}
! # All proxies need access to the classifier and the interface
!
! foreach my $name (keys %{$components{proxy}}) {
! $components{proxy}{$name}->classifier( $components{classifier}{bayes} );
! $components{proxy}{$name}->ui( $components{interface}{html} );
! }
!
! # All interface components need access to the classifier and the UI
foreach my $name (keys %{$components{interface}}) {
$components{interface}{$name}->classifier( $components{classifier}{bayes} );
! $components{interface}{$name}->ui( $components{interface}{html} ) if ( $name ne 'html' );
}
--- 292,307 ----
$components{$type}{$name}->configuration( $components{core}{config} );
$components{$type}{$name}->logger( $components{core}{logger} ) if ( $name ne 'logger' );
+ $components{$type}{$name}->mq( $components{core}{mq} );
}
}
! # All interface components need access to the classifier
foreach my $name (keys %{$components{interface}}) {
$components{interface}{$name}->classifier( $components{classifier}{bayes} );
! }
!
! foreach my $name (keys %{$components{proxy}}) {
! $components{proxy}{$name}->classifier( $components{classifier}{bayes} );
}
***************
*** 399,410 ****
}
! print "\n\n Saving configuration\n";
! flush STDOUT;
!
! # Write the final configuration to disk
!
! $components{core}{config}->save_configuration();
!
! print "\nPOPFile Engine $version_string terminated\n";
# ---------------------------------------------------------------------------------------------
--- 399,403 ----
}
! print "\n\nPOPFile Engine $version_string terminated\n";
# ---------------------------------------------------------------------------------------------
Index: stopwords
===================================================================
RCS file: /cvsroot/popfile/engine/stopwords,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** stopwords 7 Apr 2003 18:01:41 -0000 1.1
--- stopwords 14 Jun 2003 21:10:12 -0000 1.2
***************
*** 1,11 ****
- you
strike
date
- form
textflow
him
pdt
- code
also
acronym
pst
--- 1,11 ----
strike
+ you
date
textflow
+ form
him
pdt
also
+ code
acronym
pst
***************
*** 14,23 ****
cgi
charset
- est
nbsp
sun
your
- title
but
and
multicol
--- 14,23 ----
cgi
charset
nbsp
+ est
sun
your
but
+ title
and
multicol
***************
*** 30,38 ****
being
dir
- jan
she
color
- have
will
received
going
--- 30,38 ----
being
dir
she
+ jan
color
will
+ have
received
going
***************
*** 40,50 ****
htm
edt
- height
- mbox
can
! iframe
dfn
! com
were
would
off
--- 40,50 ----
htm
edt
can
! mbox
! height
dfn
! iframe
were
+ com
would
off
***************
*** 67,87 ****
aug
overlay
- www
div
status
doing
tue
person
- mon
- cellspacing
his
! helo
select
esmtp
alt
- wbr
- message
- border
note
big
thu
--- 67,87 ----
aug
overlay
div
+ www
status
doing
tue
person
his
! cellspacing
! mon
select
+ helo
esmtp
alt
note
+ border
+ message
+ wbr
big
thu
***************
*** 126,165 ****
body
nobr
- html
bgcolor
from
var
- oct
her
banner
del
- blockquote
math
! any
path
spot
- cdt
textarea
! embed
the
done
yet
it's
- net
font
! thead
blink
plaintext
- went
could
does
param
- this
jul
org
- mar
- src
- mailto
for
cst
kbd
--- 126,165 ----
body
nobr
bgcolor
+ html
from
var
her
+ oct
banner
del
math
! blockquote
path
+ any
spot
textarea
! cdt
the
+ embed
done
yet
it's
font
! net
blink
+ thead
plaintext
could
+ went
does
param
jul
+ this
org
for
+ mailto
+ src
+ mar
cst
kbd
***************
*** 172,183 ****
helvetica
samp
- col
- tab
been
fig
mail
cite
- had
link
script
menu
--- 172,183 ----
helvetica
samp
been
+ tab
+ col
fig
mail
cite
link
+ had
script
menu
***************
*** 187,193 ****
ins
sep
- sub
was
! sat
frameset
apr
--- 187,193 ----
ins
sep
was
! sub
frameset
+ sat
apr
|