|
From: <jgr...@us...> - 2003-09-15 14:56:13
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv15349/Classifier
Modified Files:
Bayes.pm
Log Message:
BROUGHT TEST SUITE TO 100% AFTER BERKELEYDB CHANGE
HTML.pm:
Small modification to the way that showing an individual bucket works
so that when it is cleared we show the same page with a zero word
count and no words.
Bayes.pm:
Mechanism for marking a bucket deleted even though the files in
its directory have not all disappeared. This is needed because if
a user deletes a bucket while a download is occurring POPFile will
have a connection open to the db which will prevent it from getting
deleted. Now have a helper delete_bucket_files__ that will do the
delete and add the special delete file as needed.
clear_bucket can't rely on being able to remove the table.db for
the same reasons as the above, so change the way in which it operates.
TestBayes.tst:
Tests to cover upgrading of corpus tables, fork handling and deal
with the fact that the corpus is no longer in flat files.
TestHTML.tst:
Have to call prefork/forked/postfork when forking. Test the new
force_fork UI elements.
TestPOP3.tst:
Have to call prefork/forked/postfork when forking.
TestProxy.tst:
Add code to test the forking/non-forking version of the proxy code.
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.196
retrieving revision 1.197
diff -C2 -d -r1.196 -r1.197
*** Bayes.pm 11 Sep 2003 08:06:03 -0000 1.196
--- Bayes.pm 15 Sep 2003 14:55:36 -0000 1.197
***************
*** 480,483 ****
--- 480,491 ----
next if ( lc($bucket) ne $bucket );
+ # Look for the delete file that indicates that this bucket
+ # is no longer needed
+
+ if ( -e "$bucket/delete" ) {
+ $self->delete_bucket_files__( $bucket );
+ next;
+ }
+
my $color = '';
***************
*** 617,623 ****
# tied hash from it thus performing an automatic upgrade.
! $self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash",
-Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE;
if ( !defined( $self->get_bucket_word_count( $bucket ) ) ) {
--- 625,631 ----
# tied hash from it thus performing an automatic upgrade.
! $self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
-Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE; # PROFILE BLOCK STOP
if ( !defined( $self->get_bucket_word_count( $bucket ) ) ) {
***************
*** 632,638 ****
if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
! print "\nUpgrading bucket $bucket...";
! flush STDOUT;
! my $wc = 0;
my $first = <WORDS>;
--- 640,644 ----
if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
! my $wc = 1;
my $first = <WORDS>;
***************
*** 641,645 ****
--- 647,655 ----
print STDERR "Incompatible corpus version in $bucket\n";
close WORDS;
+ return 0;
} else {
+ print "\nUpgrading bucket $bucket...";
+ flush STDOUT;
+
while ( <WORDS> ) {
if ( $wc % 100 == 0 ) {
***************
*** 658,665 ****
}
! print "(completed $wc words)";
close WORDS;
} else {
close WORDS;
}
--- 668,676 ----
}
! print "(completed ", $wc-1, " words)";
close WORDS;
} else {
close WORDS;
+ return 0;
}
***************
*** 874,880 ****
my $certainty = ($c1-$c0 + 1) / 2;
! if ( $certainty < 0.4 ) {
! $class = 'unsure';
! }
if ($self->{wordscores__} && defined($ui) ) {
--- 885,889 ----
my $certainty = ($c1-$c0 + 1) / 2;
! $class = 'unsure' if ( $certainty < 0.4 );
if ($self->{wordscores__} && defined($ui) ) {
***************
*** 1431,1435 ****
my ( $self, $bucket ) = @_;
! return $self->{matrix__}{$bucket}{__POPFILE__TOTAL__};
}
--- 1440,1446 ----
my ( $self, $bucket ) = @_;
! my $total = $self->{matrix__}{$bucket}{__POPFILE__TOTAL__};
!
! return defined( $total )?$total:0;
}
***************
*** 1528,1532 ****
my ( $self, $bucket ) = @_;
! return $self->{colors__}{$bucket};
}
--- 1539,1545 ----
my ( $self, $bucket ) = @_;
! my $color = $self->{colors__}{$bucket};
!
! return defined( $color )?$color:'black';
}
***************
*** 1628,1634 ****
mkdir( $self->config_( 'corpus' ) . "/$bucket" );
! tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash",
-Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE;
$self->load_word_matrix_();
--- 1641,1647 ----
mkdir( $self->config_( 'corpus' ) . "/$bucket" );
! tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
-Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE; # PROFILE BLOCK STOP
$self->load_word_matrix_();
***************
*** 1652,1668 ****
}
- my $bucket_directory = $self->config_( 'corpus' ) . "/$bucket";
-
$self->close_database__();
unlink( "$bucket_directory/table.db" );
unlink( "$bucket_directory/color" );
unlink( "$bucket_directory/params" );
unlink( "$bucket_directory/magnets" );
rmdir( $bucket_directory );
! $self->load_word_matrix_();
! return 1;
}
--- 1665,1707 ----
}
$self->close_database__();
+ $self->delete_bucket_files__( $bucket );
+ $self->load_word_matrix_();
+
+ return 1;
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # delete_bucket_files__
+ #
+ # Helper that removes the files associated with a bucket
+ #
+ # $bucket The bucket to tidy up
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub delete_bucket_files__
+ {
+ my ( $self, $bucket ) = @_;
+ my $bucket_directory = $self->config_( 'corpus' ) . "/$bucket";
unlink( "$bucket_directory/table.db" );
+ unlink( "$bucket_directory/table" );
unlink( "$bucket_directory/color" );
unlink( "$bucket_directory/params" );
unlink( "$bucket_directory/magnets" );
+ unlink( "$bucket_directory/delete" );
rmdir( $bucket_directory );
! # If the bucket directory still exists then it indicates that the
! # table was open in another process. We create a special file
! # called 'delete' which if present will cause the loader to try
! # to delete the bucket
! if ( -e $bucket_directory ) {
! open DELETER, ">$bucket_directory/delete";
! print DELETER "Special file used by POPFile to indicate that this bucket is to be deleted\n";
! close DELETER;
! }
}
***************
*** 1723,1728 ****
}
- $self->load_word_matrix_();
-
return 1;
}
--- 1762,1765 ----
***************
*** 1882,1890 ****
my ( $self, $bucket ) = @_;
! my $bucket_directory = $self->config_( 'corpus' ) . "/$bucket";
! undef $self->{db__}{$bucket};
! untie %{$self->{matrix__}{$bucket}};
! unlink( "$bucket_directory/table.db" );
$self->load_word_matrix_();
--- 1919,1928 ----
my ( $self, $bucket ) = @_;
! foreach my $word (keys %{$self->{matrix__}{$bucket}}) {
! delete $self->{matrix__}{$bucket}{$word};
! }
! $self->{matrix__}{$bucket}{__POPFILE__TOTAL__} = 0;
! $self->{matrix__}{$bucket}{__POPFILE__UNIQUE__} = 0;
$self->load_word_matrix_();
|