|
From: <jgr...@us...> - 2003-10-09 13:53:21
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv11665/Classifier
Modified Files:
Bayes.pm
Log Message:
Classifier/Bayes.pm:
Factor the tieing and untieing of the BerkeleyDB hash into
helper function tie_bucket__ and untie_bucket__ to remove
duplicated code.
When doing a bucket upgrade untie and retie the BerkeleyDB
hash to ensure that the database has been closed between
creation and verification.
When tieing to an empty hash fix a bug where the TOTAL and
UNIQUE scores were not getting correctly initialized.
Make create_bucket use the tie_bucket__ and untie_bucket__
functions which fixes an odd situation where a db gets
tied twice.
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.206
retrieving revision 1.207
diff -C2 -d -r1.206 -r1.207
*** Bayes.pm 8 Oct 2003 20:16:56 -0000 1.206
--- Bayes.pm 9 Oct 2003 13:53:13 -0000 1.207
***************
*** 297,304 ****
for my $bucket (keys %{$self->{matrix__}}) {
! undef $self->{db__}{$bucket};
! delete $self->{db__}{$bucket};
! untie %{$self->{matrix__}{$bucket}};
! delete $self->{matrix__}{$bucket};
}
}
--- 297,301 ----
for my $bucket (keys %{$self->{matrix__}}) {
! $self->untie_bucket__( $bucket );
}
}
***************
*** 547,550 ****
--- 544,591 ----
# ---------------------------------------------------------------------------------------------
#
+ # tie_bucket__
+ #
+ # Ties an individual bucket (creating it if necessary to a BerkeleyDB file called
+ # table.db. This function has the side effect of creating entries in $self->{db__}
+ # and $self->{matrix__} for the bucket.
+ #
+ # $bucket The bucket name
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub tie_bucket__
+ {
+ my ( $self, $bucket ) = @_;
+
+ $self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
+ -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
+ -Flags => DB_CREATE; # PROFILE BLOCK STOP
+
+ if ( !defined( $self->{matrix__}{$bucket}{__POPFILE__TOTAL__} ) ) {
+ $self->{matrix__}{$bucket}{__POPFILE__TOTAL__} = 0;
+ $self->{matrix__}{$bucket}{__POPFILE__UNIQUE__} = 0;
+ }
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # tie_bucket__
+ #
+ # Unties the matrix__ hash from the BerkeleyDB
+ #
+ # $bucket The bucket name
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub untie_bucket__
+ {
+ my ( $self, $bucket ) = @_;
+
+ undef $self->{db__}{$bucket};
+ delete $self->{db__}{$bucket};
+ untie %{$self->{matrix__}{$bucket}};
+ delete $self->{matrix__}{$bucket};
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
# load_bucket_
#
***************
*** 628,638 ****
# tied hash from it thus performing an automatic upgrade.
! $self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
! -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE; # PROFILE BLOCK STOP
!
! if ( !defined( $self->get_bucket_word_count( $bucket ) ) ) {
! $self->{matrix__}{$bucket}{__POPFILE__TOTAL__} = 0;
! }
if ( -e $self->config_( 'corpus' ) . "/$bucket/table" ) {
--- 669,673 ----
# tied hash from it thus performing an automatic upgrade.
! $self->tie_bucket__( $bucket );
if ( -e $self->config_( 'corpus' ) . "/$bucket/table" ) {
***************
*** 679,682 ****
--- 714,720 ----
}
+ $self->untie_bucket__( $bucket );
+ $self->tie_bucket__( $bucket );
+
if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
my $wc = 1;
***************
*** 729,736 ****
if ( $upgrade_failed ) {
! undef $self->{db__}{$bucket};
! delete $self->{db__}{$bucket};
! untie %{$self->{matrix__}{$bucket}};
! delete $self->{matrix__}{$bucket};
unlink( $self->config_( 'corpus' ) . "/$bucket/table.db" );
return 0;
--- 767,771 ----
if ( $upgrade_failed ) {
! $self->untie_bucket__( $bucket );
unlink( $self->config_( 'corpus' ) . "/$bucket/table.db" );
return 0;
***************
*** 1842,1849 ****
mkdir( $self->config_( 'corpus' ) . "/$bucket" );
! tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash", # PROFILE BLOCK START
! -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE; # PROFILE BLOCK STOP
!
$self->load_word_matrix_();
}
--- 1877,1882 ----
mkdir( $self->config_( 'corpus' ) . "/$bucket" );
! $self->tie_bucket__( $bucket );
! $self->untie_bucket__( $bucket );
$self->load_word_matrix_();
}
|