|
From: <jgr...@us...> - 2003-03-21 23:10:36
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv13478/Classifier
Modified Files:
Bayes.pm
Log Message:
Major alteration to the top 20 words so that it shows the full matrix of words that were used to make a decision about an email; removed the old top 20 code and all the complex calculations it did and replaced with simple dump of the matrix for that message
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.116
retrieving revision 1.117
diff -C2 -d -r1.116 -r1.117
*** Bayes.pm 6 Mar 2003 22:08:13 -0000 1.116
--- Bayes.pm 21 Mar 2003 23:10:32 -0000 1.117
***************
*** 533,539 ****
my %score;
- my %wordprob;
- my %wtprob;
- my %wbprob;
for my $bucket (@buckets) {
--- 533,536 ----
***************
*** 562,569 ****
foreach my $word (keys %{$self->{parser__}->{words__}}) {
my $wmax = -10000;
- if ($self->{wordscores__}) {
- $wtprob{$word} = 0;
- $wbprob{$word} = {};
- }
foreach my $bucket (@buckets) {
--- 559,562 ----
***************
*** 577,584 ****
$score{$bucket} += ( $probability * $self->{parser__}{words__}{$word} );
- if ($self->{wordscores__}) {
- $wtprob{$word} += exp($probability);
- $wbprob{$word}{$bucket} = exp($probability);
- }
}
--- 570,573 ----
***************
*** 588,592 ****
$correction += $wmax * $self->{parser__}{words__}{$word};
}
- $wordprob{$word} = exp($wmax);
}
--- 577,580 ----
***************
*** 594,601 ****
my @ranking = sort {$score{$b} <=> $score{$a}} keys %score;
! my @wordrank;
! if ($self->{wordscores__}) {
! @wordrank = sort {($wordprob{$b} / $wtprob{$b}) <=> ($wordprob{$a} / $wtprob{$a})} keys %wordprob;
! }
my %raw_score;
--- 582,586 ----
my @ranking = sort {$score{$b} <=> $score{$a}} keys %score;
!
my %raw_score;
***************
*** 619,622 ****
--- 604,608 ----
my $prob = exp($score{$b})/$total;
my $probstr;
+
if ($prob >= 0.1 || $prob == 0.0) {
$probstr = sprintf("%12.6f", $prob);
***************
*** 624,652 ****
$probstr = sprintf("%17.6e", $prob);
}
$self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n";
}
$self->{scores__} .= "</table>";
if ($self->{wordscores__}) {
$self->{scores__} .= "<table class=\"top20Words\">\n<tr><td colspan=\"4\"> </td></tr>\n";
! $self->{scores__} .= "<tr>\n<th scope=\"col\">Word</th><th scope=\"col\">Prob</th>\n<th> </th>\n";
! $self->{scores__} .= "<th scope=\"col\">\n<font color=\"$self->{colors__}{$ranking[0]}\">$ranking[0]</font>\n</th>\n</tr>\n";
! my $wi = 0;
! foreach my $word (@wordrank) {
! if ( $wi < 20 && $wordprob{$word} / $wtprob{$word} >= 0.25 ) {
! my $wordstr = $word;
! my $long = $wordstr;
! if ( length($wordstr)>14 ) {
! $wordstr =~ /(.{12})/;
! $wordstr = "$1...";
}
- my $wordcolor = get_color($self, $word);
- my $wordprobstr = sprintf("%12.4f", $wordprob{$word} / $wtprob{$word});
- my $otherprobstr = sprintf("%12.4f", $wbprob{$word}{$ranking[0]} / $wtprob{$word});
- $self->{scores__} .= "<tr>\n<td><font color=\"$wordcolor\"><a title=\"$long\">$wordstr</a></font></td>\n";
- $self->{scores__} .= "<td><font color=\"$wordcolor\">$wordprobstr</font></td>\n<td> </td>\n";
- $self->{scores__} .= "<td><font color=\"$self->{colors__}{$ranking[0]}\">$otherprobstr</font></td>\n</tr>\n";
}
! $wi += 1;
}
--- 610,669 ----
$probstr = sprintf("%17.6e", $prob);
}
+
$self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n";
}
+
$self->{scores__} .= "</table>";
if ($self->{wordscores__}) {
$self->{scores__} .= "<table class=\"top20Words\">\n<tr><td colspan=\"4\"> </td></tr>\n";
! $self->{scores__} .= "<tr>\n<th scope=\"col\">Word</th><th> </th><th scope=\"col\">Count</th><th> </th>\n";
!
! foreach my $bucket (@buckets) {
! my $bucketcolor = $self->get_bucket_color( $bucket );
! $self->{scores__} .= "<th><font color=\"$bucketcolor\">$bucket</font></th><th> </th>";
! }
!
! $self->{scores__} .= "</tr>";
!
! my @ranked_words = sort {$self->get_value_( $ranking[0], $b ) <=> $self->get_value_( $ranking[0], $a )} keys %{$self->{parser__}->{words__}};
!
! foreach my $word (@ranked_words) {
! my $known = 0;
!
! foreach my $bucket (@buckets) {
! if ( $self->get_value_( $bucket, $word ) != 0 ) {
! $known = 1;
! last;
}
}
!
! if ( $known == 1 ) {
! my $wordcolor = $self->get_color( $word );
! my $count = $self->{parser__}->{words__}{$word};
!
! $self->{scores__} .= "<tr>\n<td><font color=\"$wordcolor\">$word</font></td><td> </td><td>$count</td><td> </td>\n";
!
! my $base_probability = $self->get_value_( $ranking[0], $word );
!
! foreach my $bucket (@buckets) {
! my $probability = get_value_( $self, $bucket, $word );
! my $color = 'black';
!
! if ( $probability >= $base_probability ) {
! $color = $self->get_bucket_color( $bucket );
! }
!
! if ( $probability != 0 ) {
! my $wordprobstr = sprintf("%12.4f", exp($probability) );
!
! $self->{scores__} .= "<td><font color=\"$color\">$wordprobstr</font></td>\n<td> </td>\n";
! } else {
! $self->{scores__} .= "<td> </td>\n<td> </td>\n";
! }
! }
! }
!
! $self->{scores__} .= "</tr>";
}
***************
*** 661,668 ****
}
- if ( $self->{wordscores__} ) {
- $self->{scores__} .= "<p>(<b>$class</b>)</p>";
- }
-
return $class;
}
--- 678,681 ----
***************
*** 1042,1046 ****
$self->{parser__}->{bayes__} = bless $self;
my $result = $self->{parser__}->parse_stream($file);
! $self->{parser__}->{color__} = 0;
return $result;
--- 1055,1059 ----
$self->{parser__}->{bayes__} = bless $self;
my $result = $self->{parser__}->parse_stream($file);
! $self->{parser__}->{color__} = 0;
return $result;
***************
*** 1065,1070 ****
if ( open NEW, '>' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
! print NEW "\n";
! close NEW;
}
--- 1078,1083 ----
if ( open NEW, '>' . $self->config_( 'corpus' ) . "/$bucket/table" ) {
! print NEW "\n";
! close NEW;
}
|