From: <jgr...@us...> - 2003-05-14 09:12:48
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv17805/Classifier Modified Files: Bayes.pm MailParse.pm Log Message: Implemented magnet paging Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.143 retrieving revision 1.144 diff -C2 -d -r1.143 -r1.144 *** Bayes.pm 14 May 2003 03:20:03 -0000 1.143 --- Bayes.pm 14 May 2003 09:12:10 -0000 1.144 *************** *** 81,84 **** --- 81,85 ---- # The magnets that cause attraction to certain buckets $self->{magnets__} = {}; + $self->{magnet_count__} = 0; # The unclassified cutoff probability *************** *** 306,313 **** my $c = 0; ! $self->{matrix__} = {}; ! $self->{total__} = {}; ! $self->{magnets__} = {}; ! $self->{full_total__} = 0; my @buckets = glob $self->config_( 'corpus' ) . '/*'; --- 307,314 ---- my $c = 0; ! $self->{matrix__} = {}; ! $self->{total__} = {}; ! $self->{magnets__} = {}; ! $self->{full_total__} = 0; my @buckets = glob $self->config_( 'corpus' ) . '/*'; *************** *** 404,408 **** close PARAMS; } else { ! $self->write_parameters(); } --- 405,409 ---- close PARAMS; } else { ! $self->write_parameters(); } *************** *** 474,477 **** --- 475,503 ---- close WORDS; } + + $self->calculate_magnet_count__(); + } + + # --------------------------------------------------------------------------------------------- + # + # calculate_magnet_count__ + # + # Count the number of magnets currently defined and store in the magnet_count__ + # variable for retrievable through magnet_count() + # + # --------------------------------------------------------------------------------------------- + sub calculate_magnet_count__ + { + my ($self) = @_; + + $self->{magnet_count__} = 0; + + for my $bucket (keys %{$self->{total__}}) { + for my $type (keys %{$self->{magnets__}{$bucket}}) { + for my $from (keys %{$self->{magnets__}{$bucket}{$type}}) { + $self->{magnet_count__} += 1; + } + } + } } *************** *** 538,542 **** $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets__}{$bucket}{$type}}) { my $regex; --- 564,568 ---- $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets__}{$bucket}{$type}}) { my $regex; *************** *** 1522,1525 **** --- 1548,1552 ---- $self->{magnets__}{$bucket}{$type}{$text} = 1; $self->save_magnets__(); + $self->calculate_magnet_count__(); } *************** *** 1558,1561 **** --- 1585,1589 ---- delete $self->{magnets__}{$bucket}{$type}{$text}; $self->save_magnets__(); + $self->calculate_magnet_count__(); } *************** *** 1617,1620 **** --- 1645,1655 ---- $self->{scores__} = $value if (defined $value); return $self->{scores__}; + } + + sub magnet_count + { + my ( $self ) = @_; + + return $self->{magnet_count__}; } Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.127 retrieving revision 1.128 diff -C2 -d -r1.127 -r1.128 *** MailParse.pm 11 May 2003 23:24:36 -0000 1.127 --- MailParse.pm 14 May 2003 09:12:10 -0000 1.128 *************** *** 893,897 **** print ">>> $line" if $self->{debug}; ! if ($self->{color__}) { --- 893,897 ---- print ">>> $line" if $self->{debug}; ! if ($self->{color__}) { *************** *** 899,911 **** $colorized .= $self->{ut__}; $self->{ut__} = ''; ! } ! $self->{ut__} .= splitline($line, $encoding); } if ($self->{in_headers__}) { ! # temporary colorization while in headers is handled within parse_header ! $self->{ut__} = ''; --- 899,911 ---- $colorized .= $self->{ut__}; $self->{ut__} = ''; ! } ! $self->{ut__} .= splitline($line, $encoding); } if ($self->{in_headers__}) { ! # temporary colorization while in headers is handled within parse_header ! $self->{ut__} = ''; *************** *** 913,924 **** if ( $line =~ /^(\r\n|\r|\n)/) { ! ! # Parse the last header ! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding); # Clear the saved headers $header = ''; $argument = ''; ! $self->{ut__} .= splitline("\015\012", 0); --- 913,924 ---- if ( $line =~ /^(\r\n|\r|\n)/) { ! ! # Parse the last header ! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding); # Clear the saved headers $header = ''; $argument = ''; ! $self->{ut__} .= splitline("\015\012", 0); *************** *** 935,940 **** # Parse the last header ! ! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding) if ($header ne ''); # Save the new information for the current header --- 935,940 ---- # Parse the last header ! ! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding) if ($header ne ''); # Save the new information for the current header *************** *** 946,950 **** # Append to argument if the next line begins with whitespace (isn't a new header) ! if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) { $argument .= "\015\012" . $1; --- 946,950 ---- # Append to argument if the next line begins with whitespace (isn't a new header) ! if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) { $argument .= "\015\012" . $1; *************** *** 1047,1051 **** $colorized .= clear_out_base64( $self ); close MSG; ! $self->{in_html_tag__} = 0; --- 1047,1051 ---- $colorized .= clear_out_base64( $self ); close MSG; ! $self->{in_html_tag__} = 0; *************** *** 1123,1128 **** # the original string with it later. Thus, this subroutine returns the real decoded result. ! my ( $self, $mystring ) = @_; ! my $decode_it = ''; --- 1123,1128 ---- # the original string with it later. Thus, this subroutine returns the real decoded result. ! my ( $self, $mystring ) = @_; ! my $decode_it = ''; *************** *** 1137,1143 **** $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i; } ! } ! ! return $mystring; } --- 1137,1143 ---- $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i; } ! } ! ! return $mystring; } *************** *** 1176,1180 **** my $color = $self->{bayes__}->get_color( "header:$header" ); ! $self->{ut__} = "<b><font color=\"$color\">$header</font></b>: " . splitline( $argument . "\015\012", $encoding ); } --- 1176,1180 ---- my $color = $self->{bayes__}->get_color( "header:$header" ); ! $self->{ut__} = "<b><font color=\"$color\">$header</font></b>: $argument\015\012"; } *************** *** 1223,1227 **** if ( $header =~ /^To$/i ) { $prefix = 'to'; ! $self->{to__} = $argument if ( $self->{to__} eq '' ); } --- 1223,1227 ---- if ( $header =~ /^To$/i ) { $prefix = 'to'; ! $self->{to__} = $argument if ( $self->{to__} eq '' ); } |