From: <jgr...@us...> - 2003-04-20 21:12:39
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv28528 Modified Files: Bayes.pm Log Message: Merge biljir that improves the display of probabilities for an individual message Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.131 retrieving revision 1.132 diff -C2 -d -r1.131 -r1.132 *** Bayes.pm 20 Apr 2003 21:09:06 -0000 1.131 --- Bayes.pm 20 Apr 2003 21:12:33 -0000 1.132 *************** *** 235,238 **** --- 235,253 ---- } + # --------------------------------------------------------------------------------------------- + # + # get_sort_value_ behaves the same as get_value_, except that it returns not_likely__ rather + # than 0 if the word is not found. This makes its result more suitable as a sort key for bucket + # ranking. + # + # --------------------------------------------------------------------------------------------- + sub get_sort_value_ + { + my ($self, $bucket, $word) = @_; + my $v = get_value_($self, $bucket, $word); + return $self->{not_likely__} if $v == 0; + return $v; + } + sub set_value_ { *************** *** 545,551 **** --- 560,568 ---- my %score; + my %matchcount; for my $bucket (@buckets) { $score{$bucket} = $self->{bucket_start__}{$bucket}; + $matchcount{$bucket} = 0; } *************** *** 575,578 **** --- 592,596 ---- my $probability = get_value_( $self, $bucket, $word ); + $matchcount{$bucket} += $self->{parser__}{words__}{$word} if ($probability != 0); $probability = $self->{not_likely__} if ( $probability == 0 ); $wmax = $probability if ( $wmax < $probability ); *************** *** 646,650 **** $self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n"; ! $self->{scores__} .= "<th scope=\"col\">$language{Probability}</th></tr>\n"; foreach my $b (@ranking) { --- 664,668 ---- $self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n"; ! $self->{scores__} .= "<th scope=\"col\">$language{Count} </th><th scope=\"col\">$language{Probability}</th></tr>\n"; foreach my $b (@ranking) { *************** *** 658,662 **** } ! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n"; } --- 676,680 ---- } ! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td align=\"right\">$matchcount{$b} </td>\n<td>$probstr</td>\n</tr>\n"; } *************** *** 665,669 **** $self->{scores__} .= "<tr>\n<th scope=\"col\">$language{Word}</th><th> </th><th scope=\"col\">$language{Count}</th><th> </th>\n"; ! foreach my $bucket (@buckets) { my $bucketcolor = $self->get_bucket_color( $bucket ); $self->{scores__} .= "<th><font color=\"$bucketcolor\">$bucket</font></th><th> </th>"; --- 683,688 ---- $self->{scores__} .= "<tr>\n<th scope=\"col\">$language{Word}</th><th> </th><th scope=\"col\">$language{Count}</th><th> </th>\n"; ! foreach my $ix (0..(@buckets > 7? 7: @buckets)) { ! my $bucket = $ranking[$ix]; my $bucketcolor = $self->get_bucket_color( $bucket ); $self->{scores__} .= "<th><font color=\"$bucketcolor\">$bucket</font></th><th> </th>"; *************** *** 672,681 **** $self->{scores__} .= "</tr>"; ! my @ranked_words = sort {$self->get_value_( $ranking[0], $b ) <=> $self->get_value_( $ranking[0], $a )} keys %{$self->{parser__}->{words__}}; foreach my $word (@ranked_words) { my $known = 0; ! foreach my $bucket (@buckets) { if ( $self->get_value_( $bucket, $word ) != 0 ) { $known = 1; --- 691,700 ---- $self->{scores__} .= "</tr>"; ! my @ranked_words = sort {$self->get_sort_value_( $ranking[0], $b ) <=> $self->get_sort_value_( $ranking[0], $a )} keys %{$self->{parser__}->{words__}}; foreach my $word (@ranked_words) { my $known = 0; ! foreach my $bucket (@ranking) { if ( $self->get_value_( $bucket, $word ) != 0 ) { $known = 1; *************** *** 692,700 **** my $base_probability = $self->get_value_( $ranking[0], $word ); ! foreach my $bucket (@buckets) { my $probability = get_value_( $self, $bucket, $word ); my $color = 'black'; ! if ( $probability >= $base_probability ) { $color = $self->get_bucket_color( $bucket ); } --- 711,720 ---- my $base_probability = $self->get_value_( $ranking[0], $word ); ! foreach my $ix (0..(@buckets > 7? 7: @buckets)) { ! my $bucket = $ranking[$ix]; my $probability = get_value_( $self, $bucket, $word ); my $color = 'black'; ! if ( $probability >= $base_probability || $base_probability == 0 ) { $color = $self->get_bucket_color( $bucket ); } |