|
From: <jgr...@us...> - 2003-11-10 19:55:40
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv311/Classifier
Modified Files:
WordMangle.pm Bayes.pm
Log Message:
Multi-user Phase #1
-------------------
Make POPFile work relative to two special environment variables:
POPFILE_ROOT The location where popfile.pl is installed
POPFILE_USER The location where this user's config is kept
POPFile/Module.pm:
Add two methods
get_root_path_
get_user_path_
that convert passed in relative paths to absolute paths relative
to either POPFILE_ROOT or POPFILE_USER. This two helpers simply
call the relevant public interface in POPFile/Configuration.pm.
POPFile/Configuration.pm:
Add two methods
get_root_path
get_user_path
that convert passed in relative paths to absolute paths relative
to either POPFILE_ROOT or POPFILE_USER.
Classifer/WordMangle.pm:
Make this a PLM (with name 'wordmangle') so that it can get access
to the POPFILE_USER variable so that stopwords are per user.
Classifier/Bayes.pm:
Remove unused Classifier::WordMangle object.
Change all path usage to call get_user_path_ to set the path
correctly relative to the current user.
UI/HTML.pm:
Change path usage to call get_root_path_ or get_user_path_ to get
the path relative to the current root or user. The root is used to
access skins, manual and language files. Everything else is in the
user directory.
POPFile/Module.pm:
Use the POPFILE_ROOT to control the loading of modules.
popfile.pl:
If POPFILE_ROOT is defined the add it to @INC so that we can load
the POPFile::Loader module.
tests/TestWordMangle.tst:
Since Classifier::WordMangle is now a PLM the tests need to be
updated to load the mangler correctly and link it in with the
other POPFile modules that is depends on.
tests/TestPOP3.tst
tests/TestMailParse.tst
tests/TestBayes.tst:
Since Classifier::WordMangle is now a PLM test suites that relied
upon Classifier::MailParse creating the mangler needed updating to
actually create and pass in the mangler object.
tests/TestHTTP.tst:
Make tests work on non-Windows systems. One test was relying on
\n being \r\n.
TODO
Write tests for get_user_path and get_root_path, is_absolute_path,
root_path and path_join
Write tests for POPFILE_ROOT and POPFILE_USER
Make HTML test suite run on Linux
Debug MailParse test suite, fix Japanese handling (026).
Debug POP3 suite/TOP handling
Index: WordMangle.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/WordMangle.pm,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -d -r1.33 -r1.34
*** WordMangle.pm 4 Nov 2003 20:01:16 -0000 1.33
--- WordMangle.pm 10 Nov 2003 19:55:35 -0000 1.34
***************
*** 1,4 ****
--- 1,8 ----
+ # POPFILE LOADABLE MODULE
package Classifier::WordMangle;
+ use POPFile::Module;
+ @ISA = ("POPFile::Module");
+
# ---------------------------------------------------------------------------------------------
#
***************
*** 45,49 ****
{
my $type = shift;
! my $self;
$self->{stop__} = {};
--- 49,53 ----
{
my $type = shift;
! my $self = POPFile::Module->new();
$self->{stop__} = {};
***************
*** 51,59 ****
bless $self, $type;
! $self->load_stopwords();
return $self;
}
# ---------------------------------------------------------------------------------------------
#
--- 55,72 ----
bless $self, $type;
! $self->name( 'wordmangle' );
return $self;
}
+ sub start
+ {
+ my ( $self ) = @_;
+
+ $self->load_stopwords();
+
+ return 1;
+ }
+
# ---------------------------------------------------------------------------------------------
#
***************
*** 65,69 ****
my ($self) = @_;
! if ( open STOPS, "<stopwords" ) {
delete $self->{stop__};
while ( <STOPS> ) {
--- 78,82 ----
my ($self) = @_;
! if ( open STOPS, '<' . $self->get_user_path_( 'stopwords' ) ) {
delete $self->{stop__};
while ( <STOPS> ) {
***************
*** 80,84 ****
my ($self) = @_;
! if ( open STOPS, ">stopwords" ) {
for my $word (keys %{$self->{stop__}}) {
print STOPS "$word\n";
--- 93,97 ----
my ($self) = @_;
! if ( open STOPS, '>' . $self->get_user_path_( 'stopwords' ) ) {
for my $word (keys %{$self->{stop__}}) {
print STOPS "$word\n";
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.221
retrieving revision 1.222
diff -C2 -d -r1.221 -r1.222
*** Bayes.pm 9 Nov 2003 22:33:47 -0000 1.221
--- Bayes.pm 10 Nov 2003 19:55:35 -0000 1.222
***************
*** 35,39 ****
use locale;
use Classifier::MailParse;
- use Classifier::WordMangle;
use IO::Handle;
--- 35,38 ----
***************
*** 88,94 ****
$self->{full_total__} = 0;
- # Used to mangle the corpus when loaded
- $self->{mangler__} = new Classifier::WordMangle;
-
# Used to parse mail messages
$self->{parser__} = new Classifier::MailParse;
--- 87,90 ----
***************
*** 297,301 ****
for my $bucket (keys %{$self->{matrix__}}) {
! open PARAMS, '>' . $self->config_( 'corpus' ) . "/$bucket/params";
for my $param (keys %{$self->{parameters__}{$bucket}}) {
print PARAMS "$param $self->{parameters__}{$bucket}{$param}\n";
--- 293,297 ----
for my $bucket (keys %{$self->{matrix__}}) {
! open PARAMS, '>' . $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/params" );
for my $param (keys %{$self->{parameters__}{$bucket}}) {
print PARAMS "$param $self->{parameters__}{$bucket}{$param}\n";
***************
*** 500,504 ****
$self->{full_total__} = 0;
! my @buckets = glob $self->config_( 'corpus' ) . '/*';
foreach my $bucket (@buckets) {
--- 496,500 ----
$self->{full_total__} = 0;
! my @buckets = glob $self->get_user_path_( $self->config_( 'corpus' ) . '/*' );
foreach my $bucket (@buckets) {
***************
*** 585,589 ****
$self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
-Cachesize => $self->config_( 'db_cache_size' ),
! -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
-Flags => DB_CREATE; # PROFILE BLOCK STOP
--- 581,585 ----
$self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
-Cachesize => $self->config_( 'db_cache_size' ),
! -Filename => $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/table.db" ),
-Flags => DB_CREATE; # PROFILE BLOCK STOP
***************
*** 648,652 ****
# See if there's a color file specified
! if ( open PARAMS, '<' . $self->config_( 'corpus' ) . "/$bucket/params" ) {
while ( <PARAMS> ) {
s/[\r\n]//g;
--- 644,648 ----
# See if there's a color file specified
! if ( open PARAMS, '<' . $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/params" ) ) {
while ( <PARAMS> ) {
s/[\r\n]//g;
***************
*** 661,665 ****
# See if there are magnets defined
! if ( open MAGNETS, '<' . $self->config_( 'corpus' ) . "/$bucket/magnets" ) {
while ( <MAGNETS> ) {
s/[\r\n]//g;
--- 657,661 ----
# See if there are magnets defined
! if ( open MAGNETS, '<' . $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/magnets" ) ) {
while ( <MAGNETS> ) {
s/[\r\n]//g;
***************
*** 710,719 ****
$self->tie_bucket__( $bucket );
! if ( -e $self->config_( 'corpus' ) . "/$bucket/table" ) {
$self->log_( "Performing automatic upgrade of $bucket corpus from flat file to BerkeleyDB" );
my $ft = $self->{full_total__};
! if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
my $wc = 1;
--- 706,715 ----
$self->tie_bucket__( $bucket );
! if ( -e $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/table" ) ) {
$self->log_( "Performing automatic upgrade of $bucket corpus from flat file to BerkeleyDB" );
my $ft = $self->{full_total__};
! if ( open WORDS, '<' . $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/table" ) ) {
my $wc = 1;
***************
*** 758,762 ****
$self->tie_bucket__( $bucket );
! if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
my $wc = 1;
my $bucket_total = 0;
--- 754,758 ----
$self->tie_bucket__( $bucket );
! if ( open WORDS, '<' . $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket/table" ) ) {
my $wc = 1;
my $bucket_total = 0;
***************
*** 869,873 ****
for my $bucket (keys %{$self->{matrix__}}) {
! open MAGNET, '>' . $self->config_( 'corpus' ). "/$bucket/magnets";
for my $type (keys %{$self->{magnets__}{$bucket}}) {
--- 865,869 ----
for my $bucket (keys %{$self->{matrix__}}) {
! open MAGNET, '>' . $self->get_user_path_( $self->config_( 'corpus' ). "/$bucket/magnets" );
for my $type (keys %{$self->{magnets__}{$bucket}}) {
***************
*** 1271,1275 ****
$path = 0 if (!defined($path));
! return ($path?$self->global_config_( 'msgdir' ):'') . "popfile$dcount" . "=$mcount" . (defined $ext?$ext:'.msg');
}
--- 1267,1271 ----
$path = 0 if (!defined($path));
! return ($path?$self->get_user_path_( $self->global_config_( 'msgdir' ) ):'') . "popfile$dcount" . "=$mcount" . (defined $ext?$ext:'.msg');
}
***************
*** 1291,1295 ****
$filename =~ s/msg$/cls/;
! open CLASS, '>' . $self->global_config_( 'msgdir' ) . $filename;
if ( defined( $magnet ) && ( $magnet ne '' ) ) {
--- 1287,1291 ----
$filename =~ s/msg$/cls/;
! open CLASS, '>' . $self->get_user_path_( $self->global_config_( 'msgdir' ) . $filename );
if ( defined( $magnet ) && ( $magnet ne '' ) ) {
***************
*** 1335,1339 ****
my $magnet = '';
! if ( open CLASS, '<' . $self->global_config_( 'msgdir' ) . $filename ) {
$bucket = <CLASS>;
if ( $bucket =~ /([^ ]+) MAGNET ([^\r\n]+)/ ) {
--- 1331,1335 ----
my $magnet = '';
! if ( open CLASS, '<' . $self->get_user_path_( $self->global_config_( 'msgdir' ) . $filename ) ) {
$bucket = <CLASS>;
if ( $bucket =~ /([^ ]+) MAGNET ([^\r\n]+)/ ) {
***************
*** 1993,1997 ****
{
my ( $self, $bucket ) = @_;
! my $bucket_directory = $self->config_( 'corpus' ) . "/$bucket";
unlink( "$bucket_directory/table.db" );
--- 1989,1993 ----
{
my ( $self, $bucket ) = @_;
! my $bucket_directory = $self->get_user_path_( $self->config_( 'corpus' ) . "/$bucket" );
unlink( "$bucket_directory/table.db" );
|