|
From: <jgr...@us...> - 2003-05-14 09:12:48
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv17805/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
Implemented magnet paging
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.143
retrieving revision 1.144
diff -C2 -d -r1.143 -r1.144
*** Bayes.pm 14 May 2003 03:20:03 -0000 1.143
--- Bayes.pm 14 May 2003 09:12:10 -0000 1.144
***************
*** 81,84 ****
--- 81,85 ----
# The magnets that cause attraction to certain buckets
$self->{magnets__} = {};
+ $self->{magnet_count__} = 0;
# The unclassified cutoff probability
***************
*** 306,313 ****
my $c = 0;
! $self->{matrix__} = {};
! $self->{total__} = {};
! $self->{magnets__} = {};
! $self->{full_total__} = 0;
my @buckets = glob $self->config_( 'corpus' ) . '/*';
--- 307,314 ----
my $c = 0;
! $self->{matrix__} = {};
! $self->{total__} = {};
! $self->{magnets__} = {};
! $self->{full_total__} = 0;
my @buckets = glob $self->config_( 'corpus' ) . '/*';
***************
*** 404,408 ****
close PARAMS;
} else {
! $self->write_parameters();
}
--- 405,409 ----
close PARAMS;
} else {
! $self->write_parameters();
}
***************
*** 474,477 ****
--- 475,503 ----
close WORDS;
}
+
+ $self->calculate_magnet_count__();
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # calculate_magnet_count__
+ #
+ # Count the number of magnets currently defined and store in the magnet_count__
+ # variable for retrievable through magnet_count()
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub calculate_magnet_count__
+ {
+ my ($self) = @_;
+
+ $self->{magnet_count__} = 0;
+
+ for my $bucket (keys %{$self->{total__}}) {
+ for my $type (keys %{$self->{magnets__}{$bucket}}) {
+ for my $from (keys %{$self->{magnets__}{$bucket}{$type}}) {
+ $self->{magnet_count__} += 1;
+ }
+ }
+ }
}
***************
*** 538,542 ****
$noattype =~ s/[@\$]/\./g;
! for my $magnet (sort keys %{$self->{magnets__}{$bucket}{$type}}) {
my $regex;
--- 564,568 ----
$noattype =~ s/[@\$]/\./g;
! for my $magnet (sort keys %{$self->{magnets__}{$bucket}{$type}}) {
my $regex;
***************
*** 1522,1525 ****
--- 1548,1552 ----
$self->{magnets__}{$bucket}{$type}{$text} = 1;
$self->save_magnets__();
+ $self->calculate_magnet_count__();
}
***************
*** 1558,1561 ****
--- 1585,1589 ----
delete $self->{magnets__}{$bucket}{$type}{$text};
$self->save_magnets__();
+ $self->calculate_magnet_count__();
}
***************
*** 1617,1620 ****
--- 1645,1655 ----
$self->{scores__} = $value if (defined $value);
return $self->{scores__};
+ }
+
+ sub magnet_count
+ {
+ my ( $self ) = @_;
+
+ return $self->{magnet_count__};
}
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.127
retrieving revision 1.128
diff -C2 -d -r1.127 -r1.128
*** MailParse.pm 11 May 2003 23:24:36 -0000 1.127
--- MailParse.pm 14 May 2003 09:12:10 -0000 1.128
***************
*** 893,897 ****
print ">>> $line" if $self->{debug};
!
if ($self->{color__}) {
--- 893,897 ----
print ">>> $line" if $self->{debug};
!
if ($self->{color__}) {
***************
*** 899,911 ****
$colorized .= $self->{ut__};
$self->{ut__} = '';
! }
!
$self->{ut__} .= splitline($line, $encoding);
}
if ($self->{in_headers__}) {
!
# temporary colorization while in headers is handled within parse_header
!
$self->{ut__} = '';
--- 899,911 ----
$colorized .= $self->{ut__};
$self->{ut__} = '';
! }
!
$self->{ut__} .= splitline($line, $encoding);
}
if ($self->{in_headers__}) {
!
# temporary colorization while in headers is handled within parse_header
!
$self->{ut__} = '';
***************
*** 913,924 ****
if ( $line =~ /^(\r\n|\r|\n)/) {
!
! # Parse the last header
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding);
# Clear the saved headers
$header = '';
$argument = '';
!
$self->{ut__} .= splitline("\015\012", 0);
--- 913,924 ----
if ( $line =~ /^(\r\n|\r|\n)/) {
!
! # Parse the last header
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding);
# Clear the saved headers
$header = '';
$argument = '';
!
$self->{ut__} .= splitline("\015\012", 0);
***************
*** 935,940 ****
# Parse the last header
!
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding) if ($header ne '');
# Save the new information for the current header
--- 935,940 ----
# Parse the last header
!
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding) if ($header ne '');
# Save the new information for the current header
***************
*** 946,950 ****
# Append to argument if the next line begins with whitespace (isn't a new header)
!
if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) {
$argument .= "\015\012" . $1;
--- 946,950 ----
# Append to argument if the next line begins with whitespace (isn't a new header)
!
if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) {
$argument .= "\015\012" . $1;
***************
*** 1047,1051 ****
$colorized .= clear_out_base64( $self );
close MSG;
!
$self->{in_html_tag__} = 0;
--- 1047,1051 ----
$colorized .= clear_out_base64( $self );
close MSG;
!
$self->{in_html_tag__} = 0;
***************
*** 1123,1128 ****
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
!
my $decode_it = '';
--- 1123,1128 ----
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
!
my $decode_it = '';
***************
*** 1137,1143 ****
$mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
}
! }
!
! return $mystring;
}
--- 1137,1143 ----
$mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
}
! }
!
! return $mystring;
}
***************
*** 1176,1180 ****
my $color = $self->{bayes__}->get_color( "header:$header" );
! $self->{ut__} = "<b><font color=\"$color\">$header</font></b>: " . splitline( $argument . "\015\012", $encoding );
}
--- 1176,1180 ----
my $color = $self->{bayes__}->get_color( "header:$header" );
! $self->{ut__} = "<b><font color=\"$color\">$header</font></b>: $argument\015\012";
}
***************
*** 1223,1227 ****
if ( $header =~ /^To$/i ) {
$prefix = 'to';
! $self->{to__} = $argument if ( $self->{to__} eq '' );
}
--- 1223,1227 ----
if ( $header =~ /^To$/i ) {
$prefix = 'to';
! $self->{to__} = $argument if ( $self->{to__} eq '' );
}
|