Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv25861
Modified Files:
Bayes.pm
Log Message:
remove log caching in matrix. Discussed in patch:
[ 704112 ] Improve performance of traintest
Thanks to biljir for initial patch contribution
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.121
retrieving revision 1.122
diff -C2 -d -r1.121 -r1.122
*** Bayes.pm 8 Apr 2003 04:20:10 -0000 1.121
--- Bayes.pm 12 Apr 2003 07:31:23 -0000 1.122
***************
*** 137,140 ****
--- 137,142 ----
}
+ $self->{unclassified__} = log($self->{unclassified__});
+
$self->load_word_matrix_();
***************
*** 224,234 ****
if ( defined($self->{matrix__}{$bucket}[$i]) ) {
- return $1 if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E L([\-\.\d]+)\|/ ) != 0 );
- }
-
- if ( defined($self->{matrix__}{$bucket}[$i]) ) {
if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) {
! my $newvalue = log($1 / $self->{total__}{$bucket});
! set_value_( $self, $bucket, $word, "L$newvalue" );
return $newvalue;
}
--- 226,231 ----
if ( defined($self->{matrix__}{$bucket}[$i]) ) {
if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) {
! my $newvalue = log($1/$self->{total__}{$bucket});
return $newvalue;
}
***************
*** 244,251 ****
if ( $word ne '' ) {
$word =~ /^(.)/;
! my $i = ord($1);
$self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E (L?[\-\.\d]+)\|/\|$word $value\|/ ) == 0 );
}
}
--- 241,248 ----
if ( $word ne '' ) {
$word =~ /^(.)/;
! my $i = ord($1);
$self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E [\-\.\d]+\|/\|$word $value\|/ ) == 0 );
}
}
***************
*** 263,271 ****
if ( $self->{full_total__} > 0 ) {
! $self->{not_likely__} = log( 1 / ( 10 * $self->{full_total__} ) );
foreach my $bucket (keys %{$self->{total__}}) {
if ( $self->{total__}{$bucket} != 0 ) {
! $self->{bucket_start__}{$bucket} = log($self->{total__}{$bucket} / $self->{full_total__});
} else {
$self->{bucket_start__}{$bucket} = 0;
--- 260,271 ----
if ( $self->{full_total__} > 0 ) {
!
! # ln(10) =~ 2.30258509299404568401799145468436
!
! $self->{not_likely__} = -log( $self->{full_total__} ) - 2.30258509299404568401799145468436;
foreach my $bucket (keys %{$self->{total__}}) {
if ( $self->{total__}{$bucket} != 0 ) {
! $self->{bucket_start__}{$bucket} = log( $self->{total__}{$bucket} / $self->{full_total__} );
} else {
$self->{bucket_start__}{$bucket} = 0;
***************
*** 595,599 ****
$raw_score{$b} = $score{$b};
$score{$b} -= $base_score;
! $total += exp($score{$b}) if ($score{$b} > 54 * log(0.5));
}
--- 595,602 ----
$raw_score{$b} = $score{$b};
$score{$b} -= $base_score;
!
! # ln(2) =~ 0.693147180559945309417232121458177
!
! $total += exp($score{$b}) if ($score{$b} > ( -54 * 0.693147180559945309417232121458177 ) );
}
***************
*** 673,678 ****
# If no bucket has a probability better than 0.5, call the message "unclassified".
my $class = 'unclassified';
!
! if ( ( $total != 0 ) && ( $score{$ranking[0]} > log($self->{unclassified__} * $total) ) ) {
$class = $ranking[0];
}
--- 676,681 ----
# If no bucket has a probability better than 0.5, call the message "unclassified".
my $class = 'unclassified';
!
! if ( ( $total != 0 ) && ( $score{$ranking[0]} > $self->{unclassified__} + log($total) ) ) {
$class = $ranking[0];
}
|