|
From: <jgr...@us...> - 2003-10-08 20:17:02
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv15110/Classifier
Modified Files:
Bayes.pm
Log Message:
Added code to check the database after an upgrade and warn the user of a fault; it checks all word counts and the total and unique count before proceeding
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.205
retrieving revision 1.206
diff -C2 -d -r1.205 -r1.206
*** Bayes.pm 8 Oct 2003 13:39:15 -0000 1.205
--- Bayes.pm 8 Oct 2003 20:16:56 -0000 1.206
***************
*** 677,685 ****
return 0;
}
! unlink( $self->config_( 'corpus' ) . "/$bucket/table" );
! $self->{full_total__} = $ft;
! }
}
--- 677,750 ----
return 0;
}
+ }
! if ( open WORDS, '<' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
! my $wc = 1;
! my $bucket_total = 0;
! my $bucket_unique = 0;
! my $upgrade_failed = 0;
! my $first = <WORDS>;
! if ( defined( $first ) && ( $first =~ s/^__CORPUS__ __VERSION__ (\d+)// ) ) {
! if ( $1 != $self->{corpus_version__} ) {
! print STDERR "Incompatible corpus version in $bucket\n";
! close WORDS;
! return 0;
! } else {
! print "\nVerifying successful bucket upgrade of $bucket...";
! flush STDOUT;
!
! while ( <WORDS> ) {
! if ( $wc % 100 == 0 ) {
! print "$wc ";
! flush STDOUT;
! }
! $wc += 1;
! s/[\r\n]//g;
!
! if ( /^([^\s]+) (\d+)$/ ) {
! if ( $self->get_base_value_( $bucket, $1 ) != $2 ) {
! print "\nUpgrade error for word $1 in bucket $bucket.\nShutdown POPFile and rerun.\n";
! $upgrade_failed = 1;
! last;
! }
! $bucket_total += $2;
! $bucket_unique += 1;
! } else {
! $self->log_( "Found entry in corpus for $bucket that looks wrong: \"$_\" (ignoring)" );
! }
! }
! }
!
! close WORDS;
!
! if ( $bucket_total != $self->get_bucket_word_count( $bucket ) ) {
! print "\nUpgrade error bucket $bucket word count is incorrect.\nShutdown POPFile and rerun.\n";
! $upgrade_failed = 1;
! }
! if ( $bucket_unique != $self->get_bucket_unique_count( $bucket ) ) {
! print "\nUpgrade error bucket $bucket unique count is incorrect.\nShutdown POPFile and rerun.\n";
! $upgrade_failed = 1;
! }
!
! if ( $upgrade_failed ) {
! undef $self->{db__}{$bucket};
! delete $self->{db__}{$bucket};
! untie %{$self->{matrix__}{$bucket}};
! delete $self->{matrix__}{$bucket};
! unlink( $self->config_( 'corpus' ) . "/$bucket/table.db" );
! return 0;
! }
!
! print "(successfully verified ", $wc-1, " words)";
! } else {
! close WORDS;
! return 0;
! }
! }
!
! unlink( $self->config_( 'corpus' ) . "/$bucket/table" );
!
! $self->{full_total__} = $ft;
}
|