|
From: <jgr...@us...> - 2003-04-20 21:12:39
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv28528
Modified Files:
Bayes.pm
Log Message:
Merge biljir that improves the display of probabilities for an individual message
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.131
retrieving revision 1.132
diff -C2 -d -r1.131 -r1.132
*** Bayes.pm 20 Apr 2003 21:09:06 -0000 1.131
--- Bayes.pm 20 Apr 2003 21:12:33 -0000 1.132
***************
*** 235,238 ****
--- 235,253 ----
}
+ # ---------------------------------------------------------------------------------------------
+ #
+ # get_sort_value_ behaves the same as get_value_, except that it returns not_likely__ rather
+ # than 0 if the word is not found. This makes its result more suitable as a sort key for bucket
+ # ranking.
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub get_sort_value_
+ {
+ my ($self, $bucket, $word) = @_;
+ my $v = get_value_($self, $bucket, $word);
+ return $self->{not_likely__} if $v == 0;
+ return $v;
+ }
+
sub set_value_
{
***************
*** 545,551 ****
--- 560,568 ----
my %score;
+ my %matchcount;
for my $bucket (@buckets) {
$score{$bucket} = $self->{bucket_start__}{$bucket};
+ $matchcount{$bucket} = 0;
}
***************
*** 575,578 ****
--- 592,596 ----
my $probability = get_value_( $self, $bucket, $word );
+ $matchcount{$bucket} += $self->{parser__}{words__}{$word} if ($probability != 0);
$probability = $self->{not_likely__} if ( $probability == 0 );
$wmax = $probability if ( $wmax < $probability );
***************
*** 646,650 ****
$self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n";
! $self->{scores__} .= "<th scope=\"col\">$language{Probability}</th></tr>\n";
foreach my $b (@ranking) {
--- 664,668 ----
$self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n";
! $self->{scores__} .= "<th scope=\"col\">$language{Count} </th><th scope=\"col\">$language{Probability}</th></tr>\n";
foreach my $b (@ranking) {
***************
*** 658,662 ****
}
! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n";
}
--- 676,680 ----
}
! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td align=\"right\">$matchcount{$b} </td>\n<td>$probstr</td>\n</tr>\n";
}
***************
*** 665,669 ****
$self->{scores__} .= "<tr>\n<th scope=\"col\">$language{Word}</th><th> </th><th scope=\"col\">$language{Count}</th><th> </th>\n";
! foreach my $bucket (@buckets) {
my $bucketcolor = $self->get_bucket_color( $bucket );
$self->{scores__} .= "<th><font color=\"$bucketcolor\">$bucket</font></th><th> </th>";
--- 683,688 ----
$self->{scores__} .= "<tr>\n<th scope=\"col\">$language{Word}</th><th> </th><th scope=\"col\">$language{Count}</th><th> </th>\n";
! foreach my $ix (0..(@buckets > 7? 7: @buckets)) {
! my $bucket = $ranking[$ix];
my $bucketcolor = $self->get_bucket_color( $bucket );
$self->{scores__} .= "<th><font color=\"$bucketcolor\">$bucket</font></th><th> </th>";
***************
*** 672,681 ****
$self->{scores__} .= "</tr>";
! my @ranked_words = sort {$self->get_value_( $ranking[0], $b ) <=> $self->get_value_( $ranking[0], $a )} keys %{$self->{parser__}->{words__}};
foreach my $word (@ranked_words) {
my $known = 0;
! foreach my $bucket (@buckets) {
if ( $self->get_value_( $bucket, $word ) != 0 ) {
$known = 1;
--- 691,700 ----
$self->{scores__} .= "</tr>";
! my @ranked_words = sort {$self->get_sort_value_( $ranking[0], $b ) <=> $self->get_sort_value_( $ranking[0], $a )} keys %{$self->{parser__}->{words__}};
foreach my $word (@ranked_words) {
my $known = 0;
! foreach my $bucket (@ranking) {
if ( $self->get_value_( $bucket, $word ) != 0 ) {
$known = 1;
***************
*** 692,700 ****
my $base_probability = $self->get_value_( $ranking[0], $word );
! foreach my $bucket (@buckets) {
my $probability = get_value_( $self, $bucket, $word );
my $color = 'black';
! if ( $probability >= $base_probability ) {
$color = $self->get_bucket_color( $bucket );
}
--- 711,720 ----
my $base_probability = $self->get_value_( $ranking[0], $word );
! foreach my $ix (0..(@buckets > 7? 7: @buckets)) {
! my $bucket = $ranking[$ix];
my $probability = get_value_( $self, $bucket, $word );
my $color = 'black';
! if ( $probability >= $base_probability || $base_probability == 0 ) {
$color = $self->get_bucket_color( $bucket );
}
|