From: <ssc...@us...> - 2003-04-12 07:31:27
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv25861 Modified Files: Bayes.pm Log Message: remove log caching in matrix. Discussed in patch: [ 704112 ] Improve performance of traintest Thanks to biljir for initial patch contribution Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.121 retrieving revision 1.122 diff -C2 -d -r1.121 -r1.122 *** Bayes.pm 8 Apr 2003 04:20:10 -0000 1.121 --- Bayes.pm 12 Apr 2003 07:31:23 -0000 1.122 *************** *** 137,140 **** --- 137,142 ---- } + $self->{unclassified__} = log($self->{unclassified__}); + $self->load_word_matrix_(); *************** *** 224,234 **** if ( defined($self->{matrix__}{$bucket}[$i]) ) { - return $1 if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E L([\-\.\d]+)\|/ ) != 0 ); - } - - if ( defined($self->{matrix__}{$bucket}[$i]) ) { if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) { ! my $newvalue = log($1 / $self->{total__}{$bucket}); ! set_value_( $self, $bucket, $word, "L$newvalue" ); return $newvalue; } --- 226,231 ---- if ( defined($self->{matrix__}{$bucket}[$i]) ) { if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) { ! my $newvalue = log($1/$self->{total__}{$bucket}); return $newvalue; } *************** *** 244,251 **** if ( $word ne '' ) { $word =~ /^(.)/; ! my $i = ord($1); $self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) ); ! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E (L?[\-\.\d]+)\|/\|$word $value\|/ ) == 0 ); } } --- 241,248 ---- if ( $word ne '' ) { $word =~ /^(.)/; ! my $i = ord($1); $self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) ); ! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E [\-\.\d]+\|/\|$word $value\|/ ) == 0 ); } } *************** *** 263,271 **** if ( $self->{full_total__} > 0 ) { ! $self->{not_likely__} = log( 1 / ( 10 * $self->{full_total__} ) ); foreach my $bucket (keys %{$self->{total__}}) { if ( $self->{total__}{$bucket} != 0 ) { ! $self->{bucket_start__}{$bucket} = log($self->{total__}{$bucket} / $self->{full_total__}); } else { $self->{bucket_start__}{$bucket} = 0; --- 260,271 ---- if ( $self->{full_total__} > 0 ) { ! ! # ln(10) =~ 2.30258509299404568401799145468436 ! ! $self->{not_likely__} = -log( $self->{full_total__} ) - 2.30258509299404568401799145468436; foreach my $bucket (keys %{$self->{total__}}) { if ( $self->{total__}{$bucket} != 0 ) { ! $self->{bucket_start__}{$bucket} = log( $self->{total__}{$bucket} / $self->{full_total__} ); } else { $self->{bucket_start__}{$bucket} = 0; *************** *** 595,599 **** $raw_score{$b} = $score{$b}; $score{$b} -= $base_score; ! $total += exp($score{$b}) if ($score{$b} > 54 * log(0.5)); } --- 595,602 ---- $raw_score{$b} = $score{$b}; $score{$b} -= $base_score; ! ! # ln(2) =~ 0.693147180559945309417232121458177 ! ! $total += exp($score{$b}) if ($score{$b} > ( -54 * 0.693147180559945309417232121458177 ) ); } *************** *** 673,678 **** # If no bucket has a probability better than 0.5, call the message "unclassified". my $class = 'unclassified'; ! ! if ( ( $total != 0 ) && ( $score{$ranking[0]} > log($self->{unclassified__} * $total) ) ) { $class = $ranking[0]; } --- 676,681 ---- # If no bucket has a probability better than 0.5, call the message "unclassified". my $class = 'unclassified'; ! ! if ( ( $total != 0 ) && ( $score{$ranking[0]} > $self->{unclassified__} + log($total) ) ) { $class = $ranking[0]; } |