|
From: <jgr...@us...> - 2003-06-14 21:10:15
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv29263/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
Added new message queuing engine for asynchronous message passing, use it for classification data and UI registrations, remove mcount and ecount global variables, add new access to all configuration options on Advanced page
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.149
retrieving revision 1.150
diff -C2 -d -r1.149 -r1.150
*** Bayes.pm 9 Jun 2003 18:33:10 -0000 1.149
--- Bayes.pm 14 Jun 2003 21:10:12 -0000 1.150
***************
*** 127,130 ****
--- 127,135 ----
$self->config_( 'hostname', $self->{hostname__} );
+ # We want to hear about classification events so that we can
+ # update statistics
+
+ $self->mq_register_( 'CLASS', $self );
+
return 1;
}
***************
*** 168,171 ****
--- 173,196 ----
# ---------------------------------------------------------------------------------------------
#
+ # deliver
+ #
+ # Called by the message queue to deliver a message
+ #
+ # There is no return value from this method
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub deliver
+ {
+ my ( $self, $type, $message, $parameter ) = @_;
+
+ if ( $type eq 'CLASS' ) {
+ $self->set_bucket_parameter( $message, 'count',
+ $self->get_bucket_parameter( $message, 'count' ) + 1 );
+ $self->write_parameters();
+ }
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
# write_parameters
#
***************
*** 243,246 ****
--- 268,284 ----
}
+ sub set_value_
+ {
+ my ($self, $bucket, $word, $value) = @_;
+
+ if ( $word ne '' ) {
+ $word =~ /^(.)/;
+ my $i = ord($1);
+
+ $self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
+ $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E [\-\.\d]+\|/\|$word $value\|/ ) == 0 );
+ }
+ }
+
# ---------------------------------------------------------------------------------------------
#
***************
*** 252,271 ****
sub get_sort_value_
{
! my ($self, $bucket, $word) = @_;
! my $v = get_value_($self, $bucket, $word);
! return $self->{not_likely__} if $v == 0;
! return $v;
! }
!
! sub set_value_
! {
! my ($self, $bucket, $word, $value) = @_;
! if ( $word ne '' ) {
! $word =~ /^(.)/;
! my $i = ord($1);
! $self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E [\-\.\d]+\|/\|$word $value\|/ ) == 0 );
}
}
--- 290,301 ----
sub get_sort_value_
{
! my ( $self, $bucket, $word ) = @_;
! my $v = $self->get_value_( $bucket, $word );
! if ( $v == 0 ) {
! return $self->{not_likely__};
! } else {
! return $v;
}
}
***************
*** 381,385 ****
#
# ---------------------------------------------------------------------------------------------
-
sub load_bucket_
{
--- 411,414 ----
***************
*** 679,685 ****
$self->{scores__} .= "<input type=\"hidden\" name=\"count\" value=\"" . ($mlen + 1) . "\" />";
$self->{scores__} .= "<hr><b>$language{QuickMagnets}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Magnet}</th>\n<th>$language{Magnet_Always}</th>\n";
!
my %types = get_magnet_types();
!
foreach my $type ( keys %types ) {
--- 708,714 ----
$self->{scores__} .= "<input type=\"hidden\" name=\"count\" value=\"" . ($mlen + 1) . "\" />";
$self->{scores__} .= "<hr><b>$language{QuickMagnets}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Magnet}</th>\n<th>$language{Magnet_Always}</th>\n";
!
my %types = get_magnet_types();
!
foreach my $type ( keys %types ) {
***************
*** 687,709 ****
{
$i += 1;
!
!
$self->{scores__} .= "<tr><td scope=\"col\">$type: ";
$self->{scores__} .= "<select name=\"text$i\" id=\"\">\n";
!
foreach my $magnet ( 0 .. $#{$qm{$type}} ) {
$self->{scores__} .= "<option>" . Classifier::MailParse::splitline(@{$qm{$type}}[$magnet], 0) . "</option>\n";
}
! $self->{scores__} .= "</select>\n";
$self->{scores__} .= "</td><td>";
! $self->{scores__} .= "<input type=\"hidden\" name=\"type$i\" id=\"magnetsAddType\" value=\"$type\"/>";
$self->{scores__} .= "<select name=\"bucket$i\" id=\"magnetsAddBucket\">\n<option value=\"\"></option>\n";
!
foreach my $bucket (@buckets) {
$self->{scores__} .= "<option value=\"$bucket\">$bucket</option>\n";
}
!
$self->{scores__} .= "</select></td></tr>";
! }
}
--- 716,737 ----
{
$i += 1;
!
$self->{scores__} .= "<tr><td scope=\"col\">$type: ";
$self->{scores__} .= "<select name=\"text$i\" id=\"\">\n";
!
foreach my $magnet ( 0 .. $#{$qm{$type}} ) {
$self->{scores__} .= "<option>" . Classifier::MailParse::splitline(@{$qm{$type}}[$magnet], 0) . "</option>\n";
}
! $self->{scores__} .= "</select>\n";
$self->{scores__} .= "</td><td>";
! $self->{scores__} .= "<input type=\"hidden\" name=\"type$i\" id=\"magnetsAddType\" value=\"$type\"/>";
$self->{scores__} .= "<select name=\"bucket$i\" id=\"magnetsAddBucket\">\n<option value=\"\"></option>\n";
!
foreach my $bucket (@buckets) {
$self->{scores__} .= "<option value=\"$bucket\">$bucket</option>\n";
}
!
$self->{scores__} .= "</select></td></tr>";
! }
}
***************
*** 899,903 ****
if ( $line =~ /(^[ \t])|([:])/ ) {
if ( $msg_subject eq '' ) {
! $msg_head_before .= $msg_head_q . $line;
} else {
$msg_head_after .= $msg_head_q . $line;
--- 927,931 ----
if ( $line =~ /(^[ \t])|([:])/ ) {
if ( $msg_subject eq '' ) {
! $msg_head_before .= $msg_head_q . $line;
} else {
$msg_head_after .= $msg_head_q . $line;
***************
*** 906,914 ****
} else {
# Gather up any lines that are questionable
!
! $msg_head_q .= $line;
}
-
-
}
} else {
--- 934,940 ----
} else {
# Gather up any lines that are questionable
!
! $msg_head_q .= $line;
}
}
} else {
***************
*** 1058,1062 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_buckets
{
--- 1084,1087 ----
***************
*** 1075,1079 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_bucket_word_count
{
--- 1100,1103 ----
***************
*** 1093,1097 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_bucket_word_list
{
--- 1117,1120 ----
***************
*** 1112,1116 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_word_count
{
--- 1135,1138 ----
***************
*** 1130,1134 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_count_for_word
{
--- 1152,1155 ----
***************
*** 1136,1140 ****
my $value = $self->get_value_( $bucket, $word );
!
return int( exp( $value ) * $self->get_bucket_word_count( $bucket ) + 0.5 );
}
--- 1157,1161 ----
my $value = $self->get_value_( $bucket, $word );
!
return int( exp( $value ) * $self->get_bucket_word_count( $bucket ) + 0.5 );
}
***************
*** 1149,1153 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_bucket_unique_count
{
--- 1170,1173 ----
***************
*** 1166,1170 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_bucket_color
{
--- 1186,1189 ----
***************
*** 1184,1188 ****
#
# ---------------------------------------------------------------------------------------------
-
sub set_bucket_color
{
--- 1203,1206 ----
***************
*** 1202,1206 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_bucket_parameter
{
--- 1220,1223 ----
***************
*** 1227,1231 ****
#
# ---------------------------------------------------------------------------------------------
-
sub set_bucket_parameter
{
--- 1244,1247 ----
***************
*** 1246,1250 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_html_colored_message
{
--- 1262,1265 ----
***************
*** 1269,1273 ****
#
# ---------------------------------------------------------------------------------------------
-
sub create_bucket
{
--- 1284,1287 ----
***************
*** 1294,1298 ****
#
# ---------------------------------------------------------------------------------------------
-
sub delete_bucket
{
--- 1308,1311 ----
***************
*** 1320,1324 ****
#
# ---------------------------------------------------------------------------------------------
-
sub rename_bucket
{
--- 1333,1336 ----
***************
*** 1340,1344 ****
#
# ---------------------------------------------------------------------------------------------
-
sub add_message_to_bucket
{
--- 1352,1355 ----
***************
*** 1399,1403 ****
#
# ---------------------------------------------------------------------------------------------
-
sub remove_message_from_bucket
{
--- 1410,1413 ----
***************
*** 1458,1462 ****
#
# ---------------------------------------------------------------------------------------------
-
sub echo_to_dot_
{
--- 1468,1471 ----
***************
*** 1466,1470 ****
# Check for an abort
last if ( $self->{alive_} == 0 );
!
print $client $_ if ( defined( $client ) );
--- 1475,1479 ----
# Check for an abort
last if ( $self->{alive_} == 0 );
!
print $client $_ if ( defined( $client ) );
***************
*** 1483,1487 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_buckets_with_magnets
{
--- 1492,1495 ----
***************
*** 1500,1504 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_magnet_types_in_bucket
{
--- 1508,1511 ----
***************
*** 1517,1521 ****
#
# ---------------------------------------------------------------------------------------------
-
sub clear_bucket
{
--- 1524,1527 ----
***************
*** 1536,1540 ****
#
# ---------------------------------------------------------------------------------------------
-
sub clear_magnets
{
--- 1542,1545 ----
***************
*** 1554,1558 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_magnets
{
--- 1559,1562 ----
***************
*** 1573,1577 ****
#
# ---------------------------------------------------------------------------------------------
-
sub create_magnet
{
--- 1577,1580 ----
***************
*** 1590,1594 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_magnet_types
{
--- 1593,1596 ----
***************
*** 1610,1614 ****
#
# ---------------------------------------------------------------------------------------------
-
sub delete_magnet
{
--- 1612,1615 ----
***************
*** 1627,1631 ****
#
# ---------------------------------------------------------------------------------------------
-
sub get_stopword_list
{
--- 1628,1631 ----
***************
*** 1646,1650 ****
#
# ---------------------------------------------------------------------------------------------
-
sub add_stopword
{
--- 1646,1649 ----
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.132
retrieving revision 1.133
diff -C2 -d -r1.132 -r1.133
*** MailParse.pm 9 Jun 2003 18:33:36 -0000 1.132
--- MailParse.pm 14 Jun 2003 21:10:12 -0000 1.133
***************
*** 75,82 ****
# This is a mapping between HTML color names and HTML hexadecimal color values used by the
# map_color value to get canonical color values
- #
- # TODO: expand this table to be more complete
! $self->{color_map__} = { 'white', 'ffffff', 'black', '000000', 'red', 'ff0000', 'green', '00ff00', 'blue', '0000ff' };
$self->{content_type__} = '';
--- 75,104 ----
# This is a mapping between HTML color names and HTML hexadecimal color values used by the
# map_color value to get canonical color values
! $self->{color_map__} = { 'aliceblue','f0f8ff', 'antiquewhite','faebd7', 'aqua','00ffff', 'aquamarine','7fffd4', 'azure','f0ffff',
! 'beige','f5f5dc', 'bisque','ffe4c4', 'black','000000', 'blanchedalmond','ffebcd', 'blue','0000ff', 'blueviolet','8a2be2',
! 'brown','a52a2a', 'burlywood','deb887', 'cadetblue','5f9ea0', 'chartreuse','7fff00', 'chocolate','d2691e', 'coral','ff7f50',
! 'cornflowerblue','6495ed', 'cornsilk','fff8dc', 'crimson','dc143c', 'cyan','00ffff', 'darkblue','00008b', 'darkcyan','008b8b',
! 'darkgoldenrod','b8860b', 'darkgray','a9a9a9', 'darkgreen','006400', 'darkkhaki','bdb76b', 'darkmagenta','8b008b', 'darkolivegreen','556b2f',
! 'darkorange','ff8c00', 'darkorchid','9932cc', 'darkred','8b0000', 'darksalmon','e9967a', 'darkseagreen','8fbc8f', 'darkslateblue','483d8b',
! 'darkturquoise','00ced1', 'darkviolet','9400d3', 'deeppink','ff1493', 'deepskyblue','00bfff', 'deepskyblue','2f4f4f', 'dimgray','696969',
! 'dodgerblue','1e90ff', 'firebrick','b22222', 'floralwhite','fffaf0', 'forestgreen','228b22', 'fuchsia','ff00ff', 'gainsboro','dcdcdc',
! 'ghostwhite','f8f8ff', 'gold','ffd700', 'goldenrod','daa520', 'gray','808080', 'green','008000', 'greenyellow','adff2f',
! 'honeydew','f0fff0', 'hotpink','ff69b4', 'indianred','cd5c5c', 'indigo','4b0082', 'ivory','fffff0', 'khaki','f0e68c',
! 'lavender','e6e6fa', 'lavenderblush','fff0f5', 'lawngreen','7cfc00', 'lemonchiffon','fffacd', 'lightblue','add8e6',
! 'lightcoral','f08080', 'lightcyan','e0ffff', 'lightgoldenrodyellow','fafad2', 'lightgreen','90ee90', 'lightgrey','d3d3d3',
! 'lightpink','ffb6c1', 'lightsalmon','ffa07a', 'lightseagreen','20b2aa', 'lightskyblue','87cefa', 'lightslategray','778899',
! 'lightsteelblue','b0c4de', 'lightyellow','ffffe0', 'lime','00ff00', 'limegreen','32cd32', 'linen','faf0e6', 'magenta','ff00ff',
! 'maroon','800000', 'mediumaquamarine','66cdaa', 'mediumblue','0000cd', 'mediumorchid','ba55d3', 'mediumpurple','9370db',
! 'mediumseagreen','3cb371', 'mediumslateblue','7b68ee', 'mediumspringgreen','00fa9a', 'mediumturquoise','48d1cc',
! 'mediumvioletred','c71585', 'midnightblue','191970', 'mintcream','f5fffa', 'mistyrose','ffe4e1', 'moccasin','ffe4b5',
! 'navajowhite','ffdead', 'navy','000080', 'oldlace','fdf5e6', 'olive','808000', 'olivedrab','6b8e23', 'orange','ffa500',
! 'orangered','ff4500', 'orchid','da70d6', 'palegoldenrod','eee8aa', 'palegreen','98fb98', 'paleturquoise','afeeee',
! 'palevioletred','db7093', 'papayawhip','ffefd5', 'peachpuff','ffdab9', 'peru','cd853f', 'pink','ffc0cb', 'plum','dda0dd',
! 'powderblue','b0e0e6', 'purple','800080', 'red','ff0000', 'rosybrown','bc8f8f', 'royalblue','4169e1', 'saddlebrown','8b4513',
! 'salmon','fa8072', 'sandybrown','f4a460', 'seagreen','2e8b57', 'seashell','fff5ee', 'sienna','a0522d', 'silver','c0c0c0',
! 'skyblue','87ceeb', 'slateblue','6a5acd', 'slategray','708090', 'snow','fffafa', 'springgreen','00ff7f', 'steelblue','4682b4',
! 'tan','d2b48c', 'teal','008080', 'thistle','d8bfd8', 'tomato','ff6347', 'turquoise','40e0d0', 'violet','ee82ee', 'wheat','f5deb3',
! 'white','ffffff', 'whitesmoke','f5f5f5', 'yellow','ffff00', 'yellowgreen','9acd32' };
$self->{content_type__} = '';
|