You can subscribe to this list here.
| 2003 |
Jan
|
Feb
(160) |
Mar
(119) |
Apr
(111) |
May
(118) |
Jun
(101) |
Jul
(304) |
Aug
(113) |
Sep
(140) |
Oct
(137) |
Nov
(87) |
Dec
(122) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2004 |
Jan
(78) |
Feb
(125) |
Mar
(131) |
Apr
(59) |
May
(121) |
Jun
(166) |
Jul
(150) |
Aug
(137) |
Sep
(73) |
Oct
(58) |
Nov
(27) |
Dec
(60) |
| 2005 |
Jan
(131) |
Feb
(84) |
Mar
(36) |
Apr
(8) |
May
(28) |
Jun
(20) |
Jul
(10) |
Aug
(72) |
Sep
(76) |
Oct
(34) |
Nov
(3) |
Dec
(29) |
| 2006 |
Jan
(13) |
Feb
(92) |
Mar
(7) |
Apr
(1) |
May
(1) |
Jun
(2) |
Jul
(4) |
Aug
(17) |
Sep
(5) |
Oct
(2) |
Nov
(8) |
Dec
(12) |
| 2007 |
Jan
(28) |
Feb
(15) |
Mar
|
Apr
|
May
(8) |
Jun
(4) |
Jul
(5) |
Aug
(8) |
Sep
(20) |
Oct
(38) |
Nov
(65) |
Dec
(92) |
| 2008 |
Jan
(21) |
Feb
(56) |
Mar
(27) |
Apr
(174) |
May
(25) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
|
From: <jgr...@us...> - 2003-04-15 21:20:22
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv4822/Classifier
Modified Files:
Bayes.pm
Log Message:
Finished working on false positive/negative counting
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.123
retrieving revision 1.124
diff -C2 -d -r1.123 -r1.124
*** Bayes.pm 12 Apr 2003 21:16:52 -0000 1.123
--- Bayes.pm 15 Apr 2003 21:20:17 -0000 1.124
***************
*** 1069,1073 ****
my ( $self, $bucket, $parameter ) = @_;
! return $self->{parameters__}{$bucket}{$parameter};
}
--- 1069,1080 ----
my ( $self, $bucket, $parameter ) = @_;
! my $param = $self->{parameters__}{$bucket}{$parameter};
!
! if ( !defined( $param ) ) {
! $param = 0;
! }
!
! $self->log_( "get_bucket_parameter( $bucket, $parameter ) is $param ");
! return $param;
}
***************
*** 1088,1092 ****
--- 1095,1102 ----
my ( $self, $bucket, $parameter, $value ) = @_;
+ $self->log_( "set_bucket_parameter( $bucket, $parameter ) to $value ");
+
$self->{parameters__}{$bucket}{$parameter} = $value;
+ $self->write_parameters();
}
|
|
From: <jgr...@us...> - 2003-04-15 14:48:16
|
Update of /cvsroot/popfile/engine/Proxy
In directory sc8-pr-cvs1:/tmp/cvs-serv17837
Modified Files:
NNTP.pm POP3.pm SMTP.pm
Log Message:
Support new proxy_welcome_string to set the welcome string used by the child process for security reasons
Index: NNTP.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Proxy/NNTP.pm,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** NNTP.pm 25 Mar 2003 05:24:55 -0000 1.8
--- NNTP.pm 15 Apr 2003 14:48:09 -0000 1.9
***************
*** 67,70 ****
--- 67,73 ----
$self->config_( 'separator', ':');
+ # The welcome string from the proxy is configurable
+ $self->config_( 'welcome_string', "NNTP POPFile ($self->{version_}) server ready" );
+
# Tell the user interface module that we having a configuration
# item that needs a UI component
***************
*** 109,113 ****
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "201 NNTP POPFile (vTODO.TODO.TODO) server ready$eol" );
# Retrieve commands from the client and process them until the client disconnects or
--- 112,116 ----
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "201 " . $self->config_( 'welcome_string' ) . "$eol" );
# Retrieve commands from the client and process them until the client disconnects or
Index: POP3.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Proxy/POP3.pm,v
retrieving revision 1.50
retrieving revision 1.51
diff -C2 -d -r1.50 -r1.51
*** POP3.pm 25 Mar 2003 05:24:55 -0000 1.50
--- POP3.pm 15 Apr 2003 14:48:09 -0000 1.51
***************
*** 71,74 ****
--- 71,77 ----
$self->config_( 'separator', ':' );
+ # The welcome string from the proxy is configurable
+ $self->config_( 'welcome_string', "POP3 POPFile ($self->{version_}) server ready" );
+
# Tell the user interface module that we having a configuration
# item that needs a UI component
***************
*** 118,122 ****
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "+OK POP3 POPFile ($self->{version_}) server ready$eol" );
# Retrieve commands from the client and process them until the client disconnects or
--- 121,125 ----
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "+OK " . $self->config_( 'welcome_string' ) . "$eol" );
# Retrieve commands from the client and process them until the client disconnects or
Index: SMTP.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Proxy/SMTP.pm,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** SMTP.pm 25 Mar 2003 05:24:56 -0000 1.9
--- SMTP.pm 15 Apr 2003 14:48:09 -0000 1.10
***************
*** 68,71 ****
--- 68,74 ----
$self->config_( 'local', 1 );
+ # The welcome string from the proxy is configurable
+ $self->config_( 'welcome_string', "SMTP POPFile ($self->{version_}) welcome" );
+
# Tell the user interface module that we having a configuration
# item that needs a UI component
***************
*** 111,115 ****
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "220 SMTP POPFile ($self->{version_}) server ready$eol" );
# Retrieve commands from the client and process them until the client disconnects or
--- 114,118 ----
# Tell the client that we are ready for commands and identify our version number
! $self->tee_( $client, "220 " . $self->config_( 'welcome_string' ) . "$eol" );
# Retrieve commands from the client and process them until the client disconnects or
|
|
From: <jgr...@us...> - 2003-04-15 13:32:43
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv8855
Modified Files:
HTML.pm
Log Message:
Fix minor bug where column headers in History display came out in wrong order, fix some alignment, use > and < instead of + and - for sort order
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.128
retrieving revision 1.129
diff -C2 -d -r1.128 -r1.129
*** HTML.pm 15 Apr 2003 05:32:13 -0000 1.128
--- HTML.pm 15 Apr 2003 13:32:35 -0000 1.129
***************
*** 2003,2008 ****
@{$self->{history_keys__}} = keys %{$self->{history__}};
}
!
!
# If a sort is specified then use it to sort the history items by an a subkey
# (from, subject or bucket) otherwise use compare_mf to give the history back
--- 2003,2007 ----
@{$self->{history_keys__}} = keys %{$self->{history__}};
}
!
# If a sort is specified then use it to sort the history items by an a subkey
# (from, subject or bucket) otherwise use compare_mf to give the history back
***************
*** 2010,2014 ****
# field we ignore all punctuation characters so that "John and 'John and John
# all sort next to each other
!
# Ascending or Descending? Ascending is noted by /-field/
--- 2009,2013 ----
# field we ignore all punctuation characters so that "John and 'John and John
# all sort next to each other
!
# Ascending or Descending? Ascending is noted by /-field/
***************
*** 2018,2025 ****
}
! if ( $sort ne ''
# If the filter had no messages, this will be undefined
# and there are no ways to sort nothing
! && defined @{$self->{history_keys__}}) {
@{$self->{history_keys__}} = sort {
--- 2017,2026 ----
}
! if ( ( $sort ne '' ) &&
!
# If the filter had no messages, this will be undefined
# and there are no ways to sort nothing
!
! defined @{$self->{history_keys__}} ) {
@{$self->{history_keys__}} = sort {
***************
*** 2031,2034 ****
--- 2032,2036 ----
} @{$self->{history_keys__}};
} else {
+
# Here's a quick shortcut so that we don't have to iterate
# if there's no work for us to do
***************
*** 2038,2042 ****
}
}
!
@{$self->{history_keys__}} = reverse @{$self->{history_keys__}} if ($descending);
}
--- 2040,2044 ----
}
}
!
@{$self->{history_keys__}} = reverse @{$self->{history_keys__}} if ($descending);
}
***************
*** 2691,2709 ****
# History messages
$body .= "<table class=\"historyTable\" width=\"100%\" summary=\"$self->{language__}{History_MainTableSummary}\">\n";
! # column headers
!
! my %headers_table = ( '', 'ID',
! 'from', 'From',
! 'subject', 'Subject',
! 'bucket', 'Classification');
!
!
$body .= "<tr valign=\"bottom\">\n";
!
! foreach my $header (keys %headers_table) {
$body .= "<th class=\"historyLabel\" scope=\"col\">\n";
$body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=" . ($self->{form_}{sort} eq "$header"?"-":"");
$body .= "$header\">";
!
my $label = '';
if ( defined $self->{language__}{ $headers_table{$header} }) {
--- 2693,2714 ----
# History messages
$body .= "<table class=\"historyTable\" width=\"100%\" summary=\"$self->{language__}{History_MainTableSummary}\">\n";
!
! # Column headers
!
! my %headers_table = ( '', 'ID',
! 'from', 'From',
! 'subject', 'Subject',
! 'bucket', 'Classification');
!
$body .= "<tr valign=\"bottom\">\n";
!
! # It would be tempting to do keys %headers_table here but there is not guarantee that
! # they will come back in the right order
!
! foreach my $header (undef, 'from', 'subject', 'bucket') {
$body .= "<th class=\"historyLabel\" scope=\"col\">\n";
$body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=" . ($self->{form_}{sort} eq "$header"?"-":"");
$body .= "$header\">";
!
my $label = '';
if ( defined $self->{language__}{ $headers_table{$header} }) {
***************
*** 2714,2718 ****
if ( $self->{form_}{sort} =~ /^\-?\Q$header\E$/ ) {
! $body .= "<em class=\"historyLabelSort\">$label" . ($self->{form_}{sort} =~ /^-/ ? "-" : "+") . "</em>";
} else {
$body .= "$label";
--- 2719,2723 ----
if ( $self->{form_}{sort} =~ /^\-?\Q$header\E$/ ) {
! $body .= "<em class=\"historyLabelSort\">" . ($self->{form_}{sort} =~ /^-/ ? "<" : ">") . "$label</em>";
} else {
$body .= "$label";
|
|
From: <ssc...@us...> - 2003-04-15 07:31:05
|
Update of /cvsroot/popfile/engine/Proxy
In directory sc8-pr-cvs1:/tmp/cvs-serv23988
Modified Files:
Proxy.pm
Log Message:
fix null-response handling
was causing hangs if servers returned an error and
disconnecting immediately on verify_connected
Index: Proxy.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Proxy/Proxy.pm,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** Proxy.pm 25 Mar 2003 05:24:56 -0000 1.12
--- Proxy.pm 15 Apr 2003 07:31:01 -0000 1.13
***************
*** 464,468 ****
}
! if (!null_resp) {
# An error has occurred reading from the mail server
$self->tee_( $client, "$self->{connection_timeout_error_}$eol" );
--- 464,468 ----
}
! if (!$null_resp) {
# An error has occurred reading from the mail server
$self->tee_( $client, "$self->{connection_timeout_error_}$eol" );
|
|
From: <ssc...@us...> - 2003-04-15 05:32:17
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv13604
Modified Files:
HTML.pm
Log Message:
remove excess boolean
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.127
retrieving revision 1.128
diff -C2 -d -r1.127 -r1.128
*** HTML.pm 15 Apr 2003 05:30:19 -0000 1.127
--- HTML.pm 15 Apr 2003 05:32:13 -0000 1.128
***************
*** 2022,2038 ****
# and there are no ways to sort nothing
&& defined @{$self->{history_keys__}}) {
-
-
-
- if ($sort ne '') {
- @{$self->{history_keys__}} = sort {
- my ($a1,$b1) = ($self->{history__}{$a}{$sort},
- $self->{history__}{$b}{$sort});
- $a1 =~ s/[^A-Z0-9]//ig;
- $b1 =~ s/[^A-Z0-9]//ig;
- return ( $a1 cmp $b1 );
- } @{$self->{history_keys__}};
- }
} else {
# Here's a quick shortcut so that we don't have to iterate
--- 2022,2033 ----
# and there are no ways to sort nothing
&& defined @{$self->{history_keys__}}) {
+ @{$self->{history_keys__}} = sort {
+ my ($a1,$b1) = ($self->{history__}{$a}{$sort},
+ $self->{history__}{$b}{$sort});
+ $a1 =~ s/[^A-Z0-9]//ig;
+ $b1 =~ s/[^A-Z0-9]//ig;
+ return ( $a1 cmp $b1 );
+ } @{$self->{history_keys__}};
} else {
# Here's a quick shortcut so that we don't have to iterate
|
|
From: <ssc...@us...> - 2003-04-15 05:30:23
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv12825
Modified Files:
HTML.pm
Log Message:
add ascending and descending sort to columns, compact column header code some
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.126
retrieving revision 1.127
diff -C2 -d -r1.126 -r1.127
*** HTML.pm 12 Apr 2003 21:16:53 -0000 1.126
--- HTML.pm 15 Apr 2003 05:30:19 -0000 1.127
***************
*** 2010,2013 ****
--- 2010,2020 ----
# field we ignore all punctuation characters so that "John and 'John and John
# all sort next to each other
+
+ # Ascending or Descending? Ascending is noted by /-field/
+
+ my $descending = 0;
+ if ($sort =~ s/^\-//) {
+ $descending = 1;
+ }
if ( $sort ne ''
***************
*** 2015,2025 ****
# and there are no ways to sort nothing
&& defined @{$self->{history_keys__}}) {
! @{$self->{history_keys__}} = sort {
! my ($a1,$b1) = ($self->{history__}{$a}{$sort},
! $self->{history__}{$b}{$sort});
! $a1 =~ s/[^A-Z0-9]//ig;
! $b1 =~ s/[^A-Z0-9]//ig;
! return ( $a1 cmp $b1 );
! } @{$self->{history_keys__}};
} else {
# Here's a quick shortcut so that we don't have to iterate
--- 2022,2038 ----
# and there are no ways to sort nothing
&& defined @{$self->{history_keys__}}) {
!
!
!
! if ($sort ne '') {
! @{$self->{history_keys__}} = sort {
! my ($a1,$b1) = ($self->{history__}{$a}{$sort},
! $self->{history__}{$b}{$sort});
! $a1 =~ s/[^A-Z0-9]//ig;
! $b1 =~ s/[^A-Z0-9]//ig;
! return ( $a1 cmp $b1 );
! } @{$self->{history_keys__}};
! }
!
} else {
# Here's a quick shortcut so that we don't have to iterate
***************
*** 2030,2033 ****
--- 2043,2048 ----
}
}
+
+ @{$self->{history_keys__}} = reverse @{$self->{history_keys__}} if ($descending);
}
***************
*** 2682,2724 ****
$body .= "<table class=\"historyTable\" width=\"100%\" summary=\"$self->{language__}{History_MainTableSummary}\">\n";
# column headers
$body .= "<tr valign=\"bottom\">\n";
! $body .= "<th class=\"historyLabel\" scope=\"col\">\n";
! $body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=\">";
! if ( $self->{form_}{sort} eq '' ) {
! $body .= "<em class=\"historyLabelSort\">ID</em>";
! } else {
! $body .= "ID";
! }
! $body .= "</a>\n</th>\n";
! $body .= "<th class=\"historyLabel\" scope=\"col\">\n";
! $body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=from\">";
!
! if ( $self->{form_}{sort} eq 'from' ) {
! $body .= "<em class=\"historyLabelSort\">$self->{language__}{From}</em>";
! } else {
! $body .= "$self->{language__}{From}";
! }
!
! $body .= "</a>\n</th>\n";
! $body .= "<th class=\"historyLabel\" scope=\"col\">\n";
! $body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=subject\">";
! if ( $self->{form_}{sort} eq 'subject' ) {
! $body .= "<em class=\"historyLabelSort\">$self->{language__}{Subject}</em>";
! } else {
! $body .= "$self->{language__}{Subject}";
}
- $body .= "</a>\n</th>\n";
- $body .= "<th class=\"historyLabel\" scope=\"col\">\n";
- $body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=bucket\">";
- if ( $self->{form_}{sort} eq 'bucket' ) {
- $body .= "<em class=\"historyLabelSort\">$self->{language__}{Classification}</em>";
- } else {
- $body .= "$self->{language__}{Classification}";
- }
-
- $body .= "</a>\n</th>\n";
$body .= "<th class=\"historyLabel\" scope=\"col\">$self->{language__}{History_ShouldBe}</th>\n";
$body .= "<th class=\"historyLabel\" scope=\"col\">$self->{language__}{Remove}</th>\n</tr>\n";
--- 2697,2730 ----
$body .= "<table class=\"historyTable\" width=\"100%\" summary=\"$self->{language__}{History_MainTableSummary}\">\n";
# column headers
+
+ my %headers_table = ( '', 'ID',
+ 'from', 'From',
+ 'subject', 'Subject',
+ 'bucket', 'Classification');
+
+
$body .= "<tr valign=\"bottom\">\n";
!
! foreach my $header (keys %headers_table) {
! $body .= "<th class=\"historyLabel\" scope=\"col\">\n";
! $body .= "<a href=\"/history?session=$self->{session_key__}&filter=$self->{form_}{filter}&setsort=" . ($self->{form_}{sort} eq "$header"?"-":"");
! $body .= "$header\">";
!
! my $label = '';
! if ( defined $self->{language__}{ $headers_table{$header} }) {
! $label = $self->{language__}{ $headers_table{$header} };
! } else {
! $label = $headers_table{$header};
! }
! if ( $self->{form_}{sort} =~ /^\-?\Q$header\E$/ ) {
! $body .= "<em class=\"historyLabelSort\">$label" . ($self->{form_}{sort} =~ /^-/ ? "-" : "+") . "</em>";
! } else {
! $body .= "$label";
! }
! $body .= "</a>\n</th>\n";
}
$body .= "<th class=\"historyLabel\" scope=\"col\">$self->{language__}{History_ShouldBe}</th>\n";
$body .= "<th class=\"historyLabel\" scope=\"col\">$self->{language__}{Remove}</th>\n</tr>\n";
|
|
From: <ssc...@us...> - 2003-04-12 22:53:00
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv10329
Modified Files:
MailParse.pm
Log Message:
fix a warning
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.112
retrieving revision 1.113
diff -C2 -d -r1.112 -r1.113
*** MailParse.pm 12 Apr 2003 21:16:52 -0000 1.112
--- MailParse.pm 12 Apr 2003 22:52:57 -0000 1.113
***************
*** 782,787 ****
# Variables to save header information to while parsing headers
! my $header;
! my $argument;
# Clear the word hash
--- 782,787 ----
# Variables to save header information to while parsing headers
! my $header = '';
! my $argument = '';
# Clear the word hash
|
|
From: <jgr...@us...> - 2003-04-12 21:16:59
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv1534/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
Added infrastructure for QuickMagnets and made all the colorized output use words from the current language, also added false positive and false negative counting; NOTE THAT THESE ARE NOT FULLY WORKING; this check in is so that Sam and I do not diverge too much
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.122
retrieving revision 1.123
diff -C2 -d -r1.122 -r1.123
*** Bayes.pm 12 Apr 2003 07:31:23 -0000 1.122
--- Bayes.pm 12 Apr 2003 21:16:52 -0000 1.123
***************
*** 475,478 ****
--- 475,479 ----
#
# $file The name of the file containing the text to classify
+ # $ui Reference to the UI used when doing colorization
#
# Splits the mail message into valid words, then runs the Bayes algorithm to figure out
***************
*** 482,486 ****
sub classify_file
{
! my ($self, $file) = @_;
my $msg_total = 0;
--- 483,487 ----
sub classify_file
{
! my ($self, $file, $ui) = @_;
my $msg_total = 0;
***************
*** 583,587 ****
my @ranking = sort {$score{$b} <=> $score{$a}} keys %score;
-
my %raw_score;
my $base_score = $score{$ranking[0]};
--- 584,587 ----
***************
*** 601,625 ****
}
! $self->{scores__} = "<b>Scores</b><p>\n<table class=\"top20Buckets\">\n<tr>\n<th scope=\"col\">Bucket</th>\n<th> </th>\n";
! $self->{scores__} .= "<th scope=\"col\">Probability</th></tr>\n";
! foreach my $b (@ranking) {
! my $prob = exp($score{$b})/$total;
! my $probstr;
! if ($prob >= 0.1 || $prob == 0.0) {
! $probstr = sprintf("%12.6f", $prob);
! } else {
! $probstr = sprintf("%17.6e", $prob);
! }
! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n";
! }
! $self->{scores__} .= "</table>";
! if ($self->{wordscores__}) {
$self->{scores__} .= "<table class=\"top20Words\">\n<tr><td colspan=\"4\"> </td></tr>\n";
! $self->{scores__} .= "<tr>\n<th scope=\"col\">Word</th><th> </th><th scope=\"col\">Count</th><th> </th>\n";
foreach my $bucket (@buckets) {
--- 601,652 ----
}
! if ($self->{wordscores__} && defined($ui) ) {
! my @qm = @{$self->{parser__}->quickmagnets()};
! my %language = $ui->language();
! my $session_key = $ui->session_key();
! if ( $#qm >= 0 ) {
! $self->{scores__} = "<p><b>$language{QuickMagnets}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Magnet}</th>\n<th>$language{Magnet_Always}</th>\n";
! foreach my $m (@qm) {
! $self->{scores__} .= "<tr><td scope=\"col\">$m</td><td>";
! $self->{scores__} .= "<form action=\"/magnets\">\n";
! $self->{scores__} .= "<input type=\"hidden\" name=\"session\" value=\"$session_key\" />";
! $self->{scores__} .= "<input type=\"hidden\" name=\"type\" id=\"magnetsAddType\" />";
! $self->{scores__} .= "<input type=\"hidden\" name=\"text\" id=\"magnetsAddText\" />";
! $self->{scores__} .= "<select name=\"bucket\" id=\"magnetsAddBucket\">\n<option value=\"\"></option>\n";
! my @buckets = $self->get_buckets();
! foreach my $bucket (@buckets) {
! $self->{scores__} .= "<option value=\"$bucket\">$bucket</option>\n";
! }
!
! $self->{scores__} .= "</select><input type=\"submit\" class=\"submit\" name=\"create\" value=\"$language{Create}\" /></form></td></tr>";
! }
!
! $self->{scores__} .= "</table>";
! }
!
! $self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n";
! $self->{scores__} .= "<th scope=\"col\">$language{Probability}</th></tr>\n";
!
! foreach my $b (@ranking) {
! my $prob = exp($score{$b})/$total;
! my $probstr;
!
! if ($prob >= 0.1 || $prob == 0.0) {
! $probstr = sprintf("%12.6f", $prob);
! } else {
! $probstr = sprintf("%17.6e", $prob);
! }
!
! $self->{scores__} .= "<tr>\n<td><font color=\"$self->{colors__}{$b}\"><b>$b</b></font></td>\n<td> </td>\n<td>$probstr</td>\n</tr>\n";
! }
!
! $self->{scores__} .= "</table><hr>";
$self->{scores__} .= "<table class=\"top20Words\">\n<tr><td colspan=\"4\"> </td></tr>\n";
! $self->{scores__} .= "<tr>\n<th scope=\"col\">$language{Word}</th><th> </th><th scope=\"col\">$language{Count}</th><th> </th>\n";
foreach my $bucket (@buckets) {
***************
*** 1082,1086 ****
$self->{parser__}->{bayes__} = bless $self;
my $result = $self->{parser__}->parse_stream($file);
! $self->{parser__}->{color__} = 0;
return $result;
--- 1109,1113 ----
$self->{parser__}->{bayes__} = bless $self;
my $result = $self->{parser__}->parse_stream($file);
! $self->{parser__}->{color__} = 0;
return $result;
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.111
retrieving revision 1.112
diff -C2 -d -r1.111 -r1.112
*** MailParse.pm 11 Apr 2003 02:25:45 -0000 1.111
--- MailParse.pm 12 Apr 2003 21:16:52 -0000 1.112
***************
*** 54,58 ****
$self->{color__} = 0;
! # This will store the from, to, cc and subject from the last parse
$self->{from__} = '';
$self->{to__} = '';
--- 54,58 ----
$self->{color__} = 0;
! # This will store the from, to, cc and subject from the last parse
$self->{from__} = '';
$self->{to__} = '';
***************
*** 60,63 ****
--- 60,69 ----
$self->{subject__} = '';
+ # This is used to store the words found in the from, to, and subject
+ # lines for use in creating new magnets, it is a list of pairs mapping
+ # a magnet type to a magnet string, e.g. from => po...@jg...
+
+ $self->{quickmagnets__} = ();
+
# These store the current HTML background color and font color to
# detect "invisible ink" used by spammers
***************
*** 174,177 ****
--- 180,187 ----
$mword = $prefix . ':' . $mword if ( $prefix ne '' );
+ if ( $prefix =~ /(from|to|cc|subject)/i ) {
+ push @{$self->{quickmagnets__}}, ("$prefix: $word");
+ }
+
if ( $self->{color__} ) {
my $color = $self->{bayes__}->get_color($mword);
***************
*** 184,187 ****
--- 194,198 ----
$self->{ut__} .= "<font color=\"$color\">$word<\/font> ";
}
+
} else {
increment_word( $self, $mword );
***************
*** 785,805 ****
$self->{base64__} = '';
!
# Variable to note that the temporary colorized storage is "frozen",
# and what type of freeze it is (allows nesting of reasons to freeze
# colorization)
!
$self->{in_html_tag__} = 0;
!
$self->{html_tag__} = '';
$self->{html_arg__} = '';
! $self->{words__} = {};
! $self->{msg_total__} = 0;
! $self->{from__} = '';
! $self->{to__} = '';
! $self->{cc__} = '';
! $self->{subject__} = '';
! $self->{ut__} = '';
$self->{htmlbackcolor__} = map_color( $self, 'white' );
--- 796,817 ----
$self->{base64__} = '';
!
# Variable to note that the temporary colorized storage is "frozen",
# and what type of freeze it is (allows nesting of reasons to freeze
# colorization)
!
$self->{in_html_tag__} = 0;
!
$self->{html_tag__} = '';
$self->{html_arg__} = '';
! $self->{words__} = {};
! $self->{msg_total__} = 0;
! $self->{from__} = '';
! $self->{to__} = '';
! $self->{cc__} = '';
! $self->{subject__} = '';
! $self->{ut__} = '';
! $self->{quickmagnets__} = ();
$self->{htmlbackcolor__} = map_color( $self, 'white' );
***************
*** 1196,1200 ****
if ( $header =~ /^Content-Type$/i ) {
-
if ( $argument =~ /charset=\"?([^\"]{1,40})\"?/ ) {
update_word( $self, $1, 0, '' , '', 'charset' );
--- 1208,1211 ----
***************
*** 1205,1210 ****
$self->{content_type__} = $1;
}
!
! if ( $argument =~ /multipart\//i ) {
my $boundary = $argument;
--- 1216,1221 ----
$self->{content_type__} = $1;
}
!
! if ( $argument =~ /multipart\//i ) {
my $boundary = $argument;
***************
*** 1293,1297 ****
--- 1304,1313 ----
}
+ sub quickmagnets
+ {
+ my ( $self ) = @_;
+ return $self->{quickmagnets__};
+ }
1;
|
|
From: <jgr...@us...> - 2003-04-12 21:16:58
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv1534/UI
Modified Files:
HTML.pm
Log Message:
Added infrastructure for QuickMagnets and made all the colorized output use words from the current language, also added false positive and false negative counting; NOTE THAT THESE ARE NOT FULLY WORKING; this check in is so that Sam and I do not diverge too much
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.125
retrieving revision 1.126
diff -C2 -d -r1.125 -r1.126
*** HTML.pm 12 Apr 2003 02:20:17 -0000 1.125
--- HTML.pm 12 Apr 2003 21:16:53 -0000 1.126
***************
*** 1424,1428 ****
# bar_chart_100 - Output an HTML bar chart
#
! # %values A hash of bucket names with values
#
# ---------------------------------------------------------------------------------------------
--- 1424,1428 ----
# bar_chart_100 - Output an HTML bar chart
#
! # %values A hash of bucket names with values in series 0, 1, 2, ...
#
# ---------------------------------------------------------------------------------------------
***************
*** 1433,1461 ****
my $total_count = 0;
my @xaxis = sort keys %values;
for my $bucket (@xaxis) {
! $total_count += $values{$bucket};
}
for my $bucket (@xaxis) {
! my $count = pretty_number( $self, $values{$bucket} );
! my $percent;
! if ( $total_count == 0 ) {
! $percent = "0%";
! } else {
! $percent = int( $values{$bucket} * 10000 / $total_count ) / 100;
! $percent .= "%";
! }
! $body .= "<tr>\n<td align=\"left\"><font color=\"". $self->{classifier__}->get_bucket_color($bucket) . "\">$bucket</font></td>\n";
! $body .= "<td> </td>\n<td align=\"right\">$count ($percent)</td>\n</tr>\n";
}
! $body .= "<tr>\n<td colspan=\"3\"> </td>\n</tr>\n<tr>\n<td colspan=\"3\">\n";
if ( $total_count != 0 ) {
$body .= "<table class=\"barChart\" width=\"100%\" summary=\"$self->{language__}{Bucket_BarChartSummary}\">\n<tr>\n";
foreach my $bucket (@xaxis) {
! my $percent = int( $values{$bucket} * 10000 / $total_count ) / 100;
if ( $percent != 0 ) {
$body .= "<td bgcolor=\"" . $self->{classifier__}->get_bucket_color($bucket) . "\" title=\"$bucket ($percent%)\" width=\"";
--- 1433,1472 ----
my $total_count = 0;
my @xaxis = sort keys %values;
+ my @series = sort keys %{$values{$xaxis[0]}};
for my $bucket (@xaxis) {
! $total_count += $values{$bucket}{0};
}
for my $bucket (@xaxis) {
! $body .= "<tr>\n<td align=\"left\"><font color=\"". $self->{classifier__}->get_bucket_color($bucket) . "\">$bucket</font></td>\n<td> </td>";
! for my $s (@series) {
! my $value = $values{$bucket}{$s} || 0;
! my $count = $self->pretty_number( $value );
! my $percent = '';
!
! if ( $s == 0 ) {
! if ( $total_count == 0 ) {
! $percent = " (0%)";
! } else {
! $percent = " ( " . int( $value * 10000 / $total_count ) / 100;
! $percent .= "%)";
! }
! }
!
! $body .= "\n<td align=\"right\">$count$percent</td>";
! }
! $body .= "\n</tr>\n";
}
! my $colspan = 3;
!
! $body .= "<tr>\n<td colspan=\"$colspan\"> </td>\n</tr>\n<tr>\n<td colspan=\"$colspan\">\n";
if ( $total_count != 0 ) {
$body .= "<table class=\"barChart\" width=\"100%\" summary=\"$self->{language__}{Bucket_BarChartSummary}\">\n<tr>\n";
foreach my $bucket (@xaxis) {
! my $percent = int( $values{$bucket}{0} * 10000 / $total_count ) / 100;
if ( $percent != 0 ) {
$body .= "<td bgcolor=\"" . $self->{classifier__}->get_bucket_color($bucket) . "\" title=\"$bucket ($percent%)\" width=\"";
***************
*** 1470,1474 ****
if ( $total_count != 0 ) {
! $body .= "<tr>\n<td colspan=\"3\" align=\"right\"><span class=\"graphFont\">100%</span></td>\n</tr>\n";
}
--- 1481,1485 ----
if ( $total_count != 0 ) {
! $body .= "<tr>\n<td colspan=\"$colspan\" align=\"right\"><span class=\"graphFont\">100%</span></td>\n</tr>\n";
}
***************
*** 1509,1513 ****
if ( ( defined($self->{form_}{color}) ) && ( defined($self->{form_}{bucket}) ) ) {
! open COLOR, '>' . $self->{classifier__}->config_( 'corpus' ) . "/$self->{form_}{bucket}/color";
print COLOR "$self->{form_}{color}\n";
close COLOR;
--- 1520,1524 ----
if ( ( defined($self->{form_}{color}) ) && ( defined($self->{form_}{bucket}) ) ) {
! open COLOR, '>' . $self->module_config_( 'bayes', 'corpus' ) . "/$self->{form_}{bucket}/color";
print COLOR "$self->{form_}{color}\n";
close COLOR;
***************
*** 1734,1742 ****
$body .= "<table summary=\"\">\n<tr>\n";
$body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"left\">$self->{language__}{Bucket}</th>\n<th> </th>\n";
! $body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"right\">$self->{language__}{Bucket_ClassificationCount}</th>\n</tr>\n";
my %bar_values;
for my $bucket (@buckets) {
! $bar_values{$bucket} = $self->{classifier__}->get_bucket_parameter( $bucket, 'count' );
}
--- 1745,1758 ----
$body .= "<table summary=\"\">\n<tr>\n";
$body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"left\">$self->{language__}{Bucket}</th>\n<th> </th>\n";
! $body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"right\">$self->{language__}{Bucket_ClassificationCount}</th>\n";
! $body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"right\">$self->{language__}{Bucket_ClassificationFP}</th>\n";
! $body .= "<th class=\"bucketsLabel\" scope=\"col\" align=\"right\">$self->{language__}{Bucket_ClassificationFN}</th>\n</tr>\n";
!
my %bar_values;
for my $bucket (@buckets) {
! $bar_values{$bucket}{0} = $self->{classifier__}->get_bucket_parameter( $bucket, 'count' );
! $bar_values{$bucket}{1} = $self->{classifier__}->get_bucket_parameter( $bucket, 'fpcount' );
! $bar_values{$bucket}{2} = $self->{classifier__}->get_bucket_parameter( $bucket, 'fncount' );
}
***************
*** 1750,1754 ****
for my $bucket (@buckets) {
! $bar_values{$bucket} = $self->{classifier__}->get_bucket_word_count($bucket);
}
--- 1766,1772 ----
for my $bucket (@buckets) {
! $bar_values{$bucket}{0} = $self->{classifier__}->get_bucket_word_count($bucket);
! delete $bar_values{$bucket}{1};
! delete $bar_values{$bucket}{2};
}
***************
*** 2349,2356 ****
$self->log_( "Reclassifying $mail_file from $bucket to $newbucket" );
! $self->{classifier__}->set_bucket_parameter( $newbucket, 'count',
! $self->{classifier__}->get_bucket_parameter( $newbucket, 'count' ) + 1 );
! $self->{classifier__}->set_bucket_parameter( $bucket, 'count',
! $self->{classifier__}->get_bucket_parameter( $bucket, 'count' ) - 1 );
# Update the class file
--- 2367,2381 ----
$self->log_( "Reclassifying $mail_file from $bucket to $newbucket" );
! if ( $bucket ne $newbucket ) {
! $self->{classifier__}->set_bucket_parameter( $newbucket, 'count',
! $self->{classifier__}->get_bucket_parameter( $newbucket, 'count' ) + 1 );
! $self->{classifier__}->set_bucket_parameter( $bucket, 'count',
! $self->{classifier__}->get_bucket_parameter( $bucket, 'count' ) - 1 );
!
! $self->{classifier__}->set_bucket_parameter( $newbucket, 'fncount',
! $self->{classifier__}->get_bucket_parameter( $newbucket, 'fncount' ) + 1 );
! $self->{classifier__}->set_bucket_parameter( $bucket, 'fpcount',
! $self->{classifier__}->get_bucket_parameter( $bucket, 'fpcount' ) + 1 );
! }
# Update the class file
***************
*** 2407,2410 ****
--- 2432,2440 ----
$self->{classifier__}->set_bucket_parameter( $usedtobe, 'count',
$self->{classifier__}->get_bucket_parameter( $usedtobe, 'count' ) + 1 );
+
+ $self->{classifier__}->set_bucket_parameter( $bucket, 'fncount',
+ $self->{classifier__}->get_bucket_parameter( $bucket, 'fncount' ) - 1 );
+ $self->{classifier__}->set_bucket_parameter( $usedtobe, 'fpcount',
+ $self->{classifier__}->get_bucket_parameter( $usedtobe, 'fpcount' ) - 1 );
}
***************
*** 2819,2823 ****
$body .= "<table><tr><td class=\"top20\" valign=\"top\">\n";
! $self->{classifier__}->classify_file($self->global_config_( 'msgdir' ) . "$self->{form_}{view}");
$body .= $self->{classifier__}->{scores__};
$body .= "</tr></table></td>\n</tr>\n";
--- 2849,2853 ----
$body .= "<table><tr><td class=\"top20\" valign=\"top\">\n";
! $self->{classifier__}->classify_file($self->global_config_( 'msgdir' ) . "$self->{form_}{view}", $self);
$body .= $self->{classifier__}->{scores__};
$body .= "</tr></table></td>\n</tr>\n";
***************
*** 3189,3192 ****
--- 3219,3236 ----
return $self->{classifier__};
+ }
+
+ sub language
+ {
+ my ( $self ) = @_;
+
+ return %{$self->{language__}};
+ }
+
+ sub session_key
+ {
+ my ( $self ) = @_;
+
+ return $self->{session_key__};
}
|
|
From: <jgr...@us...> - 2003-04-12 21:16:58
|
Update of /cvsroot/popfile/engine/languages In directory sc8-pr-cvs1:/tmp/cvs-serv1534/languages Modified Files: English.msg Log Message: Added infrastructure for QuickMagnets and made all the colorized output use words from the current language, also added false positive and false negative counting; NOTE THAT THESE ARE NOT FULLY WORKING; this check in is so that Sam and I do not diverge too much Index: English.msg =================================================================== RCS file: /cvsroot/popfile/engine/languages/English.msg,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** English.msg 7 Apr 2003 17:55:57 -0000 1.24 --- English.msg 12 Apr 2003 21:16:54 -0000 1.25 *************** *** 22,25 **** --- 22,28 ---- Classification Classification Reclassify Reclassify + Probability Probability + Scores Scores + QuickMagnets QuickMagnets Undo Undo Close Close *************** *** 41,44 **** --- 44,49 ---- Score Score Lookup Lookup + Word Word + Count Count # The header and footer that appear on every UI page *************** *** 203,206 **** --- 208,213 ---- Bucket_Accuracy Accuracy Bucket_ClassificationCount Classification Count + Bucket_ClassificationFP False Positives + Bucket_ClassificationFN False Negatives Bucket_ResetStatistics Reset Statistics Bucket_LastReset Last Reset |
|
From: <ssc...@us...> - 2003-04-12 07:31:27
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv25861
Modified Files:
Bayes.pm
Log Message:
remove log caching in matrix. Discussed in patch:
[ 704112 ] Improve performance of traintest
Thanks to biljir for initial patch contribution
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.121
retrieving revision 1.122
diff -C2 -d -r1.121 -r1.122
*** Bayes.pm 8 Apr 2003 04:20:10 -0000 1.121
--- Bayes.pm 12 Apr 2003 07:31:23 -0000 1.122
***************
*** 137,140 ****
--- 137,142 ----
}
+ $self->{unclassified__} = log($self->{unclassified__});
+
$self->load_word_matrix_();
***************
*** 224,234 ****
if ( defined($self->{matrix__}{$bucket}[$i]) ) {
- return $1 if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E L([\-\.\d]+)\|/ ) != 0 );
- }
-
- if ( defined($self->{matrix__}{$bucket}[$i]) ) {
if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) {
! my $newvalue = log($1 / $self->{total__}{$bucket});
! set_value_( $self, $bucket, $word, "L$newvalue" );
return $newvalue;
}
--- 226,231 ----
if ( defined($self->{matrix__}{$bucket}[$i]) ) {
if ( ( $self->{matrix__}{$bucket}[$i] =~ /\|\Q$word\E (\d+)\|/ ) != 0 ) {
! my $newvalue = log($1/$self->{total__}{$bucket});
return $newvalue;
}
***************
*** 244,251 ****
if ( $word ne '' ) {
$word =~ /^(.)/;
! my $i = ord($1);
$self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E (L?[\-\.\d]+)\|/\|$word $value\|/ ) == 0 );
}
}
--- 241,248 ----
if ( $word ne '' ) {
$word =~ /^(.)/;
! my $i = ord($1);
$self->{matrix__}{$bucket}[$i] = '' if ( !defined($self->{matrix__}{$bucket}[$i]) );
! $self->{matrix__}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix__}{$bucket}[$i] =~ s/\|\Q$word\E [\-\.\d]+\|/\|$word $value\|/ ) == 0 );
}
}
***************
*** 263,271 ****
if ( $self->{full_total__} > 0 ) {
! $self->{not_likely__} = log( 1 / ( 10 * $self->{full_total__} ) );
foreach my $bucket (keys %{$self->{total__}}) {
if ( $self->{total__}{$bucket} != 0 ) {
! $self->{bucket_start__}{$bucket} = log($self->{total__}{$bucket} / $self->{full_total__});
} else {
$self->{bucket_start__}{$bucket} = 0;
--- 260,271 ----
if ( $self->{full_total__} > 0 ) {
!
! # ln(10) =~ 2.30258509299404568401799145468436
!
! $self->{not_likely__} = -log( $self->{full_total__} ) - 2.30258509299404568401799145468436;
foreach my $bucket (keys %{$self->{total__}}) {
if ( $self->{total__}{$bucket} != 0 ) {
! $self->{bucket_start__}{$bucket} = log( $self->{total__}{$bucket} / $self->{full_total__} );
} else {
$self->{bucket_start__}{$bucket} = 0;
***************
*** 595,599 ****
$raw_score{$b} = $score{$b};
$score{$b} -= $base_score;
! $total += exp($score{$b}) if ($score{$b} > 54 * log(0.5));
}
--- 595,602 ----
$raw_score{$b} = $score{$b};
$score{$b} -= $base_score;
!
! # ln(2) =~ 0.693147180559945309417232121458177
!
! $total += exp($score{$b}) if ($score{$b} > ( -54 * 0.693147180559945309417232121458177 ) );
}
***************
*** 673,678 ****
# If no bucket has a probability better than 0.5, call the message "unclassified".
my $class = 'unclassified';
!
! if ( ( $total != 0 ) && ( $score{$ranking[0]} > log($self->{unclassified__} * $total) ) ) {
$class = $ranking[0];
}
--- 676,681 ----
# If no bucket has a probability better than 0.5, call the message "unclassified".
my $class = 'unclassified';
!
! if ( ( $total != 0 ) && ( $score{$ranking[0]} > $self->{unclassified__} + log($total) ) ) {
$class = $ranking[0];
}
|
|
From: <ssc...@us...> - 2003-04-12 07:20:54
|
Update of /cvsroot/popfile/engine
In directory sc8-pr-cvs1:/tmp/cvs-serv23243
Modified Files:
traintest.pl
Log Message:
add corpus output option: -dump 1
will output the accumulated corpus to the "archive_corpus" subdirectory
Index: traintest.pl
===================================================================
RCS file: /cvsroot/popfile/engine/traintest.pl,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** traintest.pl 16 Mar 2003 01:32:31 -0000 1.3
--- traintest.pl 12 Apr 2003 07:20:50 -0000 1.4
***************
*** 21,24 ****
--- 21,26 ----
my $DEFAULT_CLASSIFIER = "bayes";
my $DEFAULT_ARCHIVE = "archive";
+ my $DEFAULT_DUMP = "0";
+ my $DEFAULT_CORPUS = "archive_corpus";
$| = 1;
***************
*** 70,73 ****
--- 72,77 ----
$config->parameter("classifier",$DEFAULT_CLASSIFIER);
$config->parameter("archive_dir",$DEFAULT_ARCHIVE);
+ $config->parameter("dump",$DEFAULT_DUMP);
+ $config->parameter("corpus_out",$DEFAULT_CORPUS);
}
***************
*** 132,147 ****
my $wordvalue = $wordtab{$bucket."|".$word};
$wordtab{$bucket."|".$word} += $b->{parser__}->{words__}{$word};
! # my $wordvalue = $b->get_value($bucket, $word);
! # $b->set_value($bucket,$word, $wordvalue + $b->{parser}->{words}{$word} );
! # $b->set_value($bucket,$word, $wordtab{$bucket."|".$word});
$b->{total__}{$bucket} += $b->{parser__}->{words__}{$word};
$b->{unique__}{$bucket} += 1 if ($wordvalue == 0);
}
$b->{full_total__} += $b->{parser__}{msg_total__};
- foreach my $word (keys %wordtab) {
- if ( $word =~ /^\Q$bucket\E\|(.*)$/ ) {
- $b->set_value_($bucket,$1, $wordtab{$word});
- }
- }
$b->update_constants_();
}
--- 136,144 ----
my $wordvalue = $wordtab{$bucket."|".$word};
$wordtab{$bucket."|".$word} += $b->{parser__}->{words__}{$word};
! $b->set_value_($bucket,$word, $wordtab{$bucket."|".$word});
$b->{total__}{$bucket} += $b->{parser__}->{words__}{$word};
$b->{unique__}{$bucket} += 1 if ($wordvalue == 0);
}
$b->{full_total__} += $b->{parser__}{msg_total__};
$b->update_constants_();
}
***************
*** 164,167 ****
--- 161,192 ----
}
+ sub dump_corpus
+ {
+ my ($self) = @_;
+
+ my $dir = $self->{configuration__}->parameter('corpus_out');
+ mkdir($dir);
+
+ foreach my $abucket ( keys %{$self->{total__}} ) {
+
+ print "saving $abucket corpus.\n";
+
+ my $subdir = $dir;
+ $subdir .= "/$abucket";
+
+ mkdir($subdir);
+
+ open CORPUS, ">$dir/$abucket/table";
+ print CORPUS "__CORPUS__ __VERSION__ 1\n";
+ for my $ord ( @{$self->get_bucket_word_list($abucket)} ) {
+ if ( defined($ord) ) {
+ while ($ord =~ s/\|([^ ]+) (\d+)\|//) {
+ print CORPUS "$1 $2\n";
+ }
+ }
+ }
+ }
+ }
+
***************
*** 255,264 ****
initialize( $c );
! $c->load_configuration();
$c->parse_command_line();
# $b->{unclassified} = ($c->parameter('unclassified_probability') || 0.0001);
! $b->{unclassified__} = ($c->parameter("bayes_unclassified_probability") || 0.5);
# test with or without stop-words
--- 280,289 ----
initialize( $c );
! $c->load_configuration();
$c->parse_command_line();
# $b->{unclassified} = ($c->parameter('unclassified_probability') || 0.0001);
! $b->{unclassified__} = log($c->parameter("bayes_unclassified_probability") || 0.5);
# test with or without stop-words
***************
*** 269,273 ****
! my $archive = $c->parameter("ui_archive_dir");
--- 294,298 ----
! my $archive = $c->parameter("html_archive_dir");
***************
*** 398,401 ****
--- 423,432 ----
my $end_time = time;
+
+ if ($c->parameter("dump")) {
+ dump_corpus($b);
+
+ }
+
my $total_messages = $#sorted_messages + 1;
***************
*** 417,420 ****
--- 448,453 ----
print " -toe: Train Only Errors, defaults to $DEFAULT_TOE\n";
print " -stopwords: Use stop-words, defaults to $DEFAULT_STOP\n";
+ print " -dump: Outputs accumulated corpus, defaults to $DEFAULT_DUMP\n";
+ print " -corpus_out: Location to save output corpus, defaults to $DEFAULT_CORPUS\n";
}
|
|
From: <ssc...@us...> - 2003-04-12 02:20:20
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv15212
Modified Files:
HTML.pm
Log Message:
fix bucket color change
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.124
retrieving revision 1.125
diff -C2 -d -r1.124 -r1.125
*** HTML.pm 10 Apr 2003 23:19:53 -0000 1.124
--- HTML.pm 12 Apr 2003 02:20:17 -0000 1.125
***************
*** 1509,1513 ****
if ( ( defined($self->{form_}{color}) ) && ( defined($self->{form_}{bucket}) ) ) {
! open COLOR, '>' . $self->config_( 'corpus' ) . "/$self->{form_}{bucket}/color";
print COLOR "$self->{form_}{color}\n";
close COLOR;
--- 1509,1513 ----
if ( ( defined($self->{form_}{color}) ) && ( defined($self->{form_}{bucket}) ) ) {
! open COLOR, '>' . $self->{classifier__}->config_( 'corpus' ) . "/$self->{form_}{bucket}/color";
print COLOR "$self->{form_}{color}\n";
close COLOR;
|
|
From: <ssc...@us...> - 2003-04-11 02:25:48
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv26590
Modified Files:
MailParse.pm
Log Message:
syntax error. Woops.
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.110
retrieving revision 1.111
diff -C2 -d -r1.110 -r1.111
*** MailParse.pm 11 Apr 2003 01:20:12 -0000 1.110
--- MailParse.pm 11 Apr 2003 02:25:45 -0000 1.111
***************
*** 1109,1113 ****
print "Header ($header) ($argument)\n" if ($self->{debug});
! if ($self->{color__} {
# Remove over-reading
$self->{ut__} = '';
--- 1109,1113 ----
print "Header ($header) ($argument)\n" if ($self->{debug});
! if ($self->{color__}) {
# Remove over-reading
$self->{ut__} = '';
|
|
From: <ssc...@us...> - 2003-04-11 01:20:16
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv27741
Modified Files:
MailParse.pm
Log Message:
reduce unneccesary colorization work in headers
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.109
retrieving revision 1.110
diff -C2 -d -r1.109 -r1.110
*** MailParse.pm 10 Apr 2003 22:22:47 -0000 1.109
--- MailParse.pm 11 Apr 2003 01:20:12 -0000 1.110
***************
*** 1108,1117 ****
print "Header ($header) ($argument)\n" if ($self->{debug});
!
! # Remove over-reading
! $self->{ut__} = '';
!
! # Qeueue just this header for colorization
! $self->{ut__} = splitline("$header: $argument\015\012", $encoding);
# After a discussion with Tim Peters and some looking at emails
--- 1108,1119 ----
print "Header ($header) ($argument)\n" if ($self->{debug});
!
! if ($self->{color__} {
! # Remove over-reading
! $self->{ut__} = '';
!
! # Qeueue just this header for colorization
! $self->{ut__} = splitline("$header: $argument\015\012", $encoding);
! }
# After a discussion with Tim Peters and some looking at emails
***************
*** 1280,1284 ****
return $line;
! }
# GETTERS/SETTERS
--- 1282,1286 ----
return $line;
! }
# GETTERS/SETTERS
|
|
From: <ssc...@us...> - 2003-04-11 01:19:36
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv27291
Modified Files:
Tag: v0/18/1
MailParse.pm
Log Message:
reduce unneccesary colorization work in headers
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.91.2.3
retrieving revision 1.91.2.4
diff -C2 -d -r1.91.2.3 -r1.91.2.4
*** MailParse.pm 11 Apr 2003 00:33:57 -0000 1.91.2.3
--- MailParse.pm 11 Apr 2003 01:19:32 -0000 1.91.2.4
***************
*** 975,983 ****
print "Header ($header) ($argument)\n" if ($self->{debug});
! # Remove over-reading
! $self->{ut} = '';
!
! # Qeueue just this header for colorization
! $self->{ut} = splitline("$header: $argument\015\012", $encoding);
# Check the encoding type in all RFC 2047 encoded headers
--- 975,985 ----
print "Header ($header) ($argument)\n" if ($self->{debug});
! if ($self->{color}) {
! # Remove over-reading
! $self->{ut} = '';
!
! # Qeueue just this header for colorization
! $self->{ut} = splitline("$header: $argument\015\012", $encoding);
! }
# Check the encoding type in all RFC 2047 encoded headers
|
Update of /cvsroot/popfile/engine/tests
In directory sc8-pr-cvs1:/tmp/cvs-serv30318
Modified Files:
Tag: v0/18/1
TestMailParse.tst TestMailParse001.cam TestMailParse002.cam
TestMailParse003.cam TestMailParse003.col TestMailParse003.msg
TestMailParse004.cam TestMailParse004.col TestMailParse005.cam
TestMailParse005.col TestMailParse006.cam TestMailParse007.cam
TestMailParse007.col TestMailParse008.cam TestMailParse008.col
TestMailParse009.cam TestMailParse009.col TestMailParse010.cam
TestMailParse010.col TestMailParse011.cam TestMailParse011.col
TestMailParse012.cam TestMailParse012.col TestMailParse013.cam
TestMailParse013.col TestMailParse014.cam TestMailParse015.cam
TestMailParse015.col TestMailParse016.cam TestMailParse017.cam
TestMailParse017.col TestMailParse018.cam TestMailParse018.col
TestMailParse019.col TestMailParse020.cam TestMailParse020.col
TestMailParse021.cam TestMailParse021.col
Log Message:
Update tests for multi-line parser
Index: TestMailParse.tst
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse.tst,v
retrieving revision 1.5.2.1
retrieving revision 1.5.2.2
diff -C2 -d -r1.5.2.1 -r1.5.2.2
*** TestMailParse.tst 11 Mar 2003 08:10:40 -0000 1.5.2.1
--- TestMailParse.tst 11 Apr 2003 00:38:04 -0000 1.5.2.2
***************
*** 142,148 ****
# Check that multi-line To: and CC: headers get handled properly
$cl->parse_stream( 'tests/TestMailParse021.msg' );
! test_assert_equal( $cl->{to}, 'ds...@ct..., ds...@do..., ds...@di..., ds...@di..., ds...@cr..., <ds...@cy...>, <ds...@do...>, <ds...@db...>, <ds...@cs...>, <ds...@cr...>, <ds...@dr...>, <ds...@cv...>, <ds...@dm...>, <ds...@da...>, <ds...@da...>' );
! test_assert_equal( $cl->{cc}, 'ds...@dm..., ds...@da..., ds...@cr..., ds...@da..., <ds...@do...>, <ds...@dn...>, <ds...@cy...>, <ds...@cs...>, <ds...@di...>, <ds...@cv...>, <ds...@dr...>, <ds...@cr...>, <ds...@dc...>, <ds...@da...>' );
#Test colorized output
--- 142,151 ----
# Check that multi-line To: and CC: headers get handled properly
$cl->parse_stream( 'tests/TestMailParse021.msg' );
! test_assert_equal( $cl->{to}, "dsmith\@ctaz.com, dsmith\@dol.net, dsmith\@dirtur.com, dsmith\@dialpoint.net, dsmith\@crosscountybank.com, \15\12\t<dsmith\@cybersurf.net>, <dsmith\@dotnet.com>, <dsmith\@db.com>, <dsmith\@cs.com>\15\12\t, <dsmith\@crossville.com>, \15\12\t<dsmith\@dreamscape.com>, <dsmith\@cvnc.net>, <dsmith\@dmrtc.net>, <dsmith\@datarecall.net>, \15\12\t<dsmith\@dasia.net>" );
! test_assert_equal( $cl->{cc}, "dsmith\@dmi.net, dsmith\@datamine.net, dsmith\@crusader.com, dsmith\@datasync.com, \15\12\t<dsmith\@doorpi.net>, <dsmith\@dnet.net>, <dsmith\@cybcon.com>, <dsmith\@csonline.net>, \15\12\t<dsmith\@directlink.net>, <dsmith\@cvip.net>, <dsmith\@dragonbbs.com>, <dsmith\@crosslinkinc.com>, \15\12\t<dsmith\@dccnet.com>, <dsmith\@dakotacom.net>" );
+ open TEST, ">tests/temp.out1";
+ print TEST "$cl->{to}\15\12\15\12\15\12";
+ print TEST "dsmith\@ctaz.com, dsmith\@dol.net, dsmith\@dirtur.com, dsmith\@dialpoint.net, dsmith\@crosscountybank.com, \15\12\t<dsmith\@cybersurf.net>, <dsmith\@dotnet.com>, <dsmith\@db.com>, <dsmith\@cs.com>\15\12\t, <dsmith\@crossville.com>, \15\12\t<dsmith\@dreamscape.com>, <dsmith\@cvnc.net>, <dsmith\@dmrtc.net>, <dsmith\@datarecall.net>, \15\12\t<dsmith\@dasia.net>";
#Test colorized output
***************
*** 185,189 ****
close COL;
close OUTPUT;
! # rename( 'tests/temp.out', $output_file );
unlink( 'tests/temp.out' );
--- 188,192 ----
close COL;
close OUTPUT;
! # rename( 'tests/temp.out', $output_file );
unlink( 'tests/temp.out' );
Index: TestMailParse001.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse001.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse001.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse001.cam 11 Apr 2003 00:38:05 -0000 1.2.2.1
***************
*** 1,7 ****
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Testing S P A C E D out words spaced out in a document.
.
--- 1,7 ----
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Testing S P A C E D out words spaced out in a document.
.
Index: TestMailParse002.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse002.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse002.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse002.cam 11 Apr 2003 00:38:05 -0000 1.2.2.1
***************
*** 1,6 ****
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! who wants to be a millionaire milli<!---->onaire mi<!-- testing -->llionaire millionair<!-- -->e
--- 1,6 ----
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! who wants to be a millionaire milli<!---->onaire mi<!-- testing -->llionaire millionair<!-- -->e
Index: TestMailParse003.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse003.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse003.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse003.cam 11 Apr 2003 00:38:05 -0000 1.2.2.1
***************
*** 1,34 ****
Subject: [spam] from: Leandro
! To: jI7...@ao...
! Message-ID: <200...@8S...>
! From: "serafina venter"<j23...@ea...>
! Date: Sun, 12 Jan 2003 19:49:36 -0400
! X-Priority: 3 (Normal)
! Importance: Normal
! X-Accept-Language: en
! Errors-To: <j23...@ea...>
! MIME-Version: 1.0
! Content-Type: multipart/alternative;
! boundary="=gs6mlah1knva3tk5nbs4422k3570q7m3k=="
! Content-Transfer-Encoding: 7bit
! Return-Path: j23...@ea...
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (UTC)
! FILETIME=[A257A580:01C2BA9D]
! --=gs6mlah1knva3tk5nbs4422k3570q7m3k==
! Content-Transfer-Encoding: base64
! Content-Type: text/html; charset="US-ASCII"
! PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3
! OyYjOTc7PCFLND50PCE0YTQ1PmMmIzEwNDs8IVBKMHV1PiAmIzY4OzwhT1UxMGRRPm88IWgzMj5nPCFOWDc4PnM8IUY0NzZ0PiAmIzExNTsmIzEwODs8IXkw
! eDY+dSYjMTE0OzwhV1ZRPnAmIzMyOzwhMW0+eTwhS1NrUD5vPCFvMzVBZT51JiMxMTA7JiMxMDM7PCE0N2ViVTM+ICYjMTAzOyYjMTA1OyYjMTE0OyYjMTA4
! OyYjMTE1OyYjMzI7PCF5MjU+cCYjMTE3OzwhOFljPnMmIzExNTsmIzEyMTs8ITVSaTQ+JzwhcEdTNj5zJiMzMjsmIzk3OzwhQWgxPnMmIzMyOyYjMTE2OyYj
! MTA0OzwhMXJKM1JIPmU8IW84V1h1PnkmIzMyOzwhMzU+czwhMFE3ND5jJiMxMTQ7PCFSZnA+ZTwhUGw+YTwhSzQ+bTwhNGE0NT4gJiMxMDI7PCFQSjB1dT5v
! JiMxMTQ7PCFPVTEwZFE+IDwhaDMyPm08IU5YNzg+bzwhRjQ3NnQ+ciYjMTAxOyYjMzM7PC9mb250PjwvYT48QlI+DQo8QlIgck0wc1JhUHE+PGEgaHJlZj0i
! aHR0cDovL3d3dyUyRSU3MCU2MSU3MyU3MyUzNGZyZWUlMkUlNkUlNjV0L3BiMy8iIDFySjNSSEJvOFcgdW5TVlQ3PjxGT05UIFNJWkU9ND48Qj48IXkweDY+
! QyYjMTA4OzwhV1ZRPmkmIzk5OzwhMW0+azwhS1NrUD4gPCFvMzVBZT5IJiMxMDE7JiMxMTQ7PCE0N2ViVTM+ZTwvZm9udD48L2E+PEJSPjxCUj48QlI+PEJS
! PjxCUj48QlI+PEJSPiYjMTM7JiMxMDsmIzY5OyYjMTA5OyYjOTc7JiMxMDU7PCF5MjU+bCYjMzI7PCE4WWM+QiYjOTc7JiMxMDA7PCE1Umk0Pj88QlIgUlIg
! Mk1PZHZjTT4NCm5vIG1vcmUgPGEgaHJlZj0iaHR0cDovL3JlbW92ZSUyRSU2RGUlNzMlNzNhJTY3JTY1bSU2NW4lNkYlNzcuJTZFZXQvIiBSZnBOUD5DbGlj
! ayBIZXJlPC9hPjxCUj4NCjxCUj48L2h0bWw+DQoNCmFQcTgyTU9kICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjTUo=
! --=gs6mlah1knva3tk5nbs4422k3570q7m3k==--
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
--- 1,34 ----
Subject: [spam] from: Leandro
! To: jI7...@ao...
! Message-ID: <200...@8S...>
! From: "serafina venter"<j23...@ea...>
! Date: Sun, 12 Jan 2003 19:49:36 -0400
! X-Priority: 3 (Normal)
! Importance: Normal
! X-Accept-Language: en
! Errors-To: <j23...@ea...>
! MIME-Version: 1.0
! Content-Type: multipart/alternative;
! boundary="=gs6mlah1knva3tk5nbs4422k3570q7m3k=="
! Content-Transfer-Encoding: 7bit
! Return-Path: j23...@ea...
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (UTC) FILETIME=[A257A580:01C2BA9D]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
+ --=gs6mlah1knva3tk5nbs4422k3570q7m3k==
+ Content-Transfer-Encoding: base64
+ Content-Type: text/html; charset="US-ASCII"
+
+ PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3
+ OyYjOTc7PCFLND50PCE0YTQ1PmMmIzEwNDs8IVBKMHV1PiAmIzY4OzwhT1UxMGRRPm88IWgzMj5nPCFOWDc4PnM8IUY0NzZ0PiAmIzExNTsmIzEwODs8IXkw
+ eDY+dSYjMTE0OzwhV1ZRPnAmIzMyOzwhMW0+eTwhS1NrUD5vPCFvMzVBZT51JiMxMTA7JiMxMDM7PCE0N2ViVTM+ICYjMTAzOyYjMTA1OyYjMTE0OyYjMTA4
+ OyYjMTE1OyYjMzI7PCF5MjU+cCYjMTE3OzwhOFljPnMmIzExNTsmIzEyMTs8ITVSaTQ+JzwhcEdTNj5zJiMzMjsmIzk3OzwhQWgxPnMmIzMyOyYjMTE2OyYj
+ MTA0OzwhMXJKM1JIPmU8IW84V1h1PnkmIzMyOzwhMzU+czwhMFE3ND5jJiMxMTQ7PCFSZnA+ZTwhUGw+YTwhSzQ+bTwhNGE0NT4gJiMxMDI7PCFQSjB1dT5v
+ JiMxMTQ7PCFPVTEwZFE+IDwhaDMyPm08IU5YNzg+bzwhRjQ3NnQ+ciYjMTAxOyYjMzM7PC9mb250PjwvYT48QlI+DQo8QlIgck0wc1JhUHE+PGEgaHJlZj0i
+ aHR0cDovL3d3dyUyRSU3MCU2MSU3MyU3MyUzNGZyZWUlMkUlNkUlNjV0L3BiMy8iIDFySjNSSEJvOFcgdW5TVlQ3PjxGT05UIFNJWkU9ND48Qj48IXkweDY+
+ QyYjMTA4OzwhV1ZRPmkmIzk5OzwhMW0+azwhS1NrUD4gPCFvMzVBZT5IJiMxMDE7JiMxMTQ7PCE0N2ViVTM+ZTwvZm9udD48L2E+PEJSPjxCUj48QlI+PEJS
+ PjxCUj48QlI+PEJSPiYjMTM7JiMxMDsmIzY5OyYjMTA5OyYjOTc7JiMxMDU7PCF5MjU+bCYjMzI7PCE4WWM+QiYjOTc7JiMxMDA7PCE1Umk0Pj88QlIgUlIg
+ Mk1PZHZjTT4NCm5vIG1vcmUgPGEgaHJlZj0iaHR0cDovL3JlbW92ZSUyRSU2RGUlNzMlNzNhJTY3JTY1bSU2NW4lNkYlNzcuJTZFZXQvIiBSZnBOUD5DbGlj
+ ayBIZXJlPC9hPjxCUj4NCjxCUj48L2h0bWw+DQoNCmFQcTgyTU9kICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjTUo=
+ --=gs6mlah1knva3tk5nbs4422k3570q7m3k==--
Index: TestMailParse003.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse003.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse003.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse003.col 11 Apr 2003 00:38:05 -0000 1.1.4.2
***************
*** 10,20 ****
MIME-Version: 1.0<br />
Content-Type: multipart/alternative;<br />
Content-Transfer-Encoding: 7bit<br />
Return-Path: j239o114@<b><font color="black">earthlink.net</font></b><br />
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (<b><font color="black">UTC</font></b>)<br />
! FILETIME=[A257A580:01C2BA9D]<br />
--=gs6mlah1knva3tk5nbs4422k3570q7m3k==<br />
Content-Transfer-Encoding: base64<br />
Content-Type: text/html; charset="<b><font color="black">US-ASCII</font></b>"<br />
PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3<br />
<br />
--- 10,22 ----
MIME-Version: 1.0<br />
Content-Type: multipart/alternative;<br />
+ boundary="=gs6mlah1knva3tk5nbs4422k3570q7m3k=="<br />
Content-Transfer-Encoding: 7bit<br />
Return-Path: j239o114@<b><font color="black">earthlink.net</font></b><br />
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (<b><font color="black">UTC</font></b>) FILETIME=[A257A580:01C2BA9D]<br />
! <br />
--=gs6mlah1knva3tk5nbs4422k3570q7m3k==<br />
Content-Transfer-Encoding: base64<br />
Content-Type: text/html; charset="<b><font color="black">US-ASCII</font></b>"<br />
+ <br />
PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3<br />
<br />
Index: TestMailParse003.msg
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse003.msg,v
retrieving revision 1.1
retrieving revision 1.1.2.1
diff -C2 -d -r1.1 -r1.1.2.1
*** TestMailParse003.msg 7 Feb 2003 22:24:33 -0000 1.1
--- TestMailParse003.msg 11 Apr 2003 00:38:06 -0000 1.1.2.1
***************
*** 13,21 ****
Content-Transfer-Encoding: 7bit
Return-Path: j23...@ea...
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (UTC)
! FILETIME=[A257A580:01C2BA9D]
--=gs6mlah1knva3tk5nbs4422k3570q7m3k==
Content-Transfer-Encoding: base64
Content-Type: text/html; charset="US-ASCII"
PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3
OyYjOTc7PCFLND50PCE0YTQ1PmMmIzEwNDs8IVBKMHV1PiAmIzY4OzwhT1UxMGRRPm88IWgzMj5nPCFOWDc4PnM8IUY0NzZ0PiAmIzExNTsmIzEwODs8IXkw
--- 13,22 ----
Content-Transfer-Encoding: 7bit
Return-Path: j23...@ea...
! X-OriginalArrivalTime: 13 Jan 2003 00:49:31.0608 (UTC) FILETIME=[A257A580:01C2BA9D]
!
--=gs6mlah1knva3tk5nbs4422k3570q7m3k==
Content-Transfer-Encoding: base64
Content-Type: text/html; charset="US-ASCII"
+
PGh0bWw+DQo8YSBocmVmPSJodHRwOi8vJTc3JTc3dy5wJTYxJTczJTczNCU2NiU3MmUlNjUlMkVuZXQvcGIzLyIgVDhJPjxGT05UIFNJWkU9NT48Qj4mIzg3
OyYjOTc7PCFLND50PCE0YTQ1PmMmIzEwNDs8IVBKMHV1PiAmIzY4OzwhT1UxMGRRPm88IWgzMj5nPCFOWDc4PnM8IUY0NzZ0PiAmIzExNTsmIzEwODs8IXkw
Index: TestMailParse004.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse004.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse004.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse004.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,23 ****
! Return-Path: <hot...@ya...>
! From: hot...@ya...
! Message-ID: <000048874f7b$00000e95$000...@mx...>
! To: <Und...@va...>
Subject: [spam]
! Date: Sat, 18 Jan 2003 19:50:57 01900
! MIME-Version: 1.0
! Content-Type: multipart/mixed;
! boundary="----=_NextPart_000_60BF_00005753.000048CC"
! X-UIDL: B<E!!^8S!!0'="!J;6"!
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! ------=_NextPart_000_60BF_00005753.000048CC
! Content-Type: text/html;
! charset="iso-8859-1"
! Content-Transfer-Encoding: base64
!
! PEhUTUw+PEJPRFkgQkdDT0xPUj0iIzAwMDAwMCI+PC9QPjxQIEFMSUdOPUNFTlRFUj48Rk9OVCAgQ09MT1I9IiNmZjAwMDAiIEJBQ0s9IiMwMDAwMDAiIHN0eWxlPSJCQUNLR1JPVU5ELUNPTE9SOiAjMDAwMDAwIiBTSVpFPTYgUFRTSVpFPTI0PlRoZSBob3R0ZXN0IEdpcmxzIE9ubGluZSE8QlI+DQpTdG9wIHdhc3RpbmcgeW91ciB0aW1lIHdpdGggNSBzZWM8QlI+DQp2aWRlbyBjbGlwcyEgQ29tZSB0byBvdXIgc2l0ZSBmb3I8QlI+DQpGcmVlIEZ1bGwgTGVuZ3RoIE1vdmllcyE8QlI+DQo8QSBIUkVGPSJodHRwOi8vd2NhbWF0ZXVycy5jb20vbC9ibCI+V2h5IHdhaXQsIHNlZSBmb3IgRnJlZTwvQT48L0ZPTlQ+PEZPTlQgIENPTE9SPSIjZmYwMDAwIiBCQUNLPSIjMDAwMDAwIiBzdHlsZT0iQkFDS0dST1VORC1DT0xPUjogIzAwMDAwMCIgU0laRT02IFBUU0laRT0yNCBGQU1JTFk9IlNBTlNTRVJJRiIgRkFDRT0iQXJpYWwiIExBTkc9IjAiPjxCUj4NCjwvUD48UCBBTElHTj1MRUZUPjwvRk9OVD48Rk9OVCAgQ09MT1I9IiNmZjAwMDAiIEJBQ0s9IiMwMDAwMDAiIHN0eWxlPSJCQUNLR1JPVU5ELUNPTE9SOiAjMDAwMDAwIiBTSVpFPTMgUFRTSVpFPTExIEZBTUlMWT0iU0FOU1NFUklGIiBGQUNFPSJBcmlhbCIgTEFORz0iMCI+PEJSPg0KPC9GT05UPjxGT05UICBDT0xPUj0iIzAwMDBmZiIgQkFDSz0iIzAwMDAwMCIgc3R5bGU9IkJBQ0tHUk9VTkQtQ09MT1I6ICMwMDAwMDAiIFNJWkU9NiBQVFNJWkU9MjQgR!
! kFNSUxZPSJTQU5TU0VSSUYiIEZBQ0U9IkFyaWFsIiBMQU5HPSIwIj48QSBIUkVGPSJodHRwOi8vd2NhbWF0ZXVycy5jb20vbC9yIj5ObyBtb3JlIG1haWwgaGVyZTwvQT48L0ZPTlQ+PC9IVE1MPg0K
!
! ------=_NextPart_000_60BF_00005753.000048CC--
!
--- 1,23 ----
! Return-Path: <hot...@ya...>
! From: hot...@ya...
! Message-ID: <000048874f7b$00000e95$000...@mx...>
! To: <Und...@va...>
Subject: [spam]
! Date: Sat, 18 Jan 2003 19:50:57 01900
! MIME-Version: 1.0
! Content-Type: multipart/mixed;
! boundary="----=_NextPart_000_60BF_00005753.000048CC"
! X-UIDL: B<E!!^8S!!0'="!J;6"!
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! ------=_NextPart_000_60BF_00005753.000048CC
! Content-Type: text/html;
! charset="iso-8859-1"
! Content-Transfer-Encoding: base64
!
! PEhUTUw+PEJPRFkgQkdDT0xPUj0iIzAwMDAwMCI+PC9QPjxQIEFMSUdOPUNFTlRFUj48Rk9OVCAgQ09MT1I9IiNmZjAwMDAiIEJBQ0s9IiMwMDAwMDAiIHN0eWxlPSJCQUNLR1JPVU5ELUNPTE9SOiAjMDAwMDAwIiBTSVpFPTYgUFRTSVpFPTI0PlRoZSBob3R0ZXN0IEdpcmxzIE9ubGluZSE8QlI+DQpTdG9wIHdhc3RpbmcgeW91ciB0aW1lIHdpdGggNSBzZWM8QlI+DQp2aWRlbyBjbGlwcyEgQ29tZSB0byBvdXIgc2l0ZSBmb3I8QlI+DQpGcmVlIEZ1bGwgTGVuZ3RoIE1vdmllcyE8QlI+DQo8QSBIUkVGPSJodHRwOi8vd2NhbWF0ZXVycy5jb20vbC9ibCI+V2h5IHdhaXQsIHNlZSBmb3IgRnJlZTwvQT48L0ZPTlQ+PEZPTlQgIENPTE9SPSIjZmYwMDAwIiBCQUNLPSIjMDAwMDAwIiBzdHlsZT0iQkFDS0dST1VORC1DT0xPUjogIzAwMDAwMCIgU0laRT02IFBUU0laRT0yNCBGQU1JTFk9IlNBTlNTRVJJRiIgRkFDRT0iQXJpYWwiIExBTkc9IjAiPjxCUj4NCjwvUD48UCBBTElHTj1MRUZUPjwvRk9OVD48Rk9OVCAgQ09MT1I9IiNmZjAwMDAiIEJBQ0s9IiMwMDAwMDAiIHN0eWxlPSJCQUNLR1JPVU5ELUNPTE9SOiAjMDAwMDAwIiBTSVpFPTMgUFRTSVpFPTExIEZBTUlMWT0iU0FOU1NFUklGIiBGQUNFPSJBcmlhbCIgTEFORz0iMCI+PEJSPg0KPC9GT05UPjxGT05UICBDT0xPUj0iIzAwMDBmZiIgQkFDSz0iIzAwMDAwMCIgc3R5bGU9IkJBQ0tHUk9VTkQtQ09MT1I6ICMwMDAwMDAiIFNJWkU9NiBQVFNJWkU9MjQgR!
! kFNSUxZPSJTQU5TU0VSSUYiIEZBQ0U9IkFyaWFsIiBMQU5HPSIwIj48QSBIUkVGPSJodHRwOi8vd2NhbWF0ZXVycy5jb20vbC9yIj5ObyBtb3JlIG1haWwgaGVyZTwvQT48L0ZPTlQ+PC9IVE1MPg0K
!
! ------=_NextPart_000_60BF_00005753.000048CC--
!
Index: TestMailParse004.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse004.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse004.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse004.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 3,15 ****
Message-ID: <000048874f7b$00000e95$000...@mx...><br />
To: <<b><font color="black">Undisclosed.Recipients@<b><font color="black">vanyel.<b><font color="black">herald.<b><font color="black">co.uk</font></b></font></b></font></b></font></b>><br />
! Subject: <br />
Date: Sat, 18 Jan 2003 19:50:57 01900<br />
MIME-Version: 1.0<br />
Content-Type: multipart/mixed;<br />
X-UIDL: B<E!!^8S!!0'="!J;6"!<br />
<br />
------=_NextPart_000_60BF_00005753.000048CC<br />
Content-Type: text/html;<br />
! charset="iso-8859-1"<br />
Content-Transfer-Encoding: base64<br />
<br />
--- 3,16 ----
Message-ID: <000048874f7b$00000e95$000...@mx...><br />
To: <<b><font color="black">Undisclosed.Recipients@<b><font color="black">vanyel.<b><font color="black">herald.<b><font color="black">co.uk</font></b></font></b></font></b></font></b>><br />
! Subject: <br />
Date: Sat, 18 Jan 2003 19:50:57 01900<br />
MIME-Version: 1.0<br />
Content-Type: multipart/mixed;<br />
+ boundary="----=_NextPart_000_60BF_00005753.000048CC"<br />
X-UIDL: B<E!!^8S!!0'="!J;6"!<br />
<br />
------=_NextPart_000_60BF_00005753.000048CC<br />
Content-Type: text/html;<br />
! charset="<b><font color="black">iso-8859-1</font></b>"<br />
Content-Transfer-Encoding: base64<br />
<br />
Index: TestMailParse005.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse005.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse005.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse005.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,30 ****
! Return-Path: <ano...@ns...>
! Date: 19 Jan 2003 15:13:43 -0000
! Message-ID: <200...@ns...>
! From: XoS...@ho... (XoS...@ho...)
Subject: [spam] Re: screename change again lol
! X-UIDL: ll'"!$DC!!7ld"!85K!!
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Below is the result of your feedback form. It was submitted by
! XoS...@ho... (XoS...@ho...) on Monday, January 20, 2003 at 00:13:42
! ---------------------------------------------------------------------------
!
! ze:
!
! Hey!! What's Up? I'm *Jenny* 20/F/San Diego/Webcam & Pics. I'm *LIVE* on my *FREE* Webcam mostly 24/7 so if you wanna come in and chat or see me go to my Personal Homepage at http://www.freelivecamgirls.net and i'll talk to you in a bit hun! If you join and the webchat is already full im sorry, just wait like 5 minutes and then you'll be able to see me LIVE!! *Remember* this is my Personal Homepage so of course its *FREE* =)
! <333 *Jenny* <333
!
! PS.Remember my Personal Homepage is http://www.freelivecamgirls.net and hopefully I can chat with you soon!! oh yah!! If you don't have a webcam of your own its ok!! You can still watch and chat with me then!! ok!! ByE!!
! <333 *Jenny* <333
!
!
!
!
!
!
!
! 3e
!
! ---------------------------------------------------------------------------
--- 1,30 ----
! Return-Path: <ano...@ns...>
! Date: 19 Jan 2003 15:13:43 -0000
! Message-ID: <200...@ns...>
! From: XoS...@ho... (XoS...@ho...)
Subject: [spam] Re: screename change again lol
! X-UIDL: ll'"!$DC!!7ld"!85K!!
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Below is the result of your feedback form. It was submitted by
! XoS...@ho... (XoS...@ho...) on Monday, January 20, 2003 at 00:13:42
! ---------------------------------------------------------------------------
!
! ze:
!
! Hey!! What's Up? I'm *Jenny* 20/F/San Diego/Webcam & Pics. I'm *LIVE* on my *FREE* Webcam mostly 24/7 so if you wanna come in and chat or see me go to my Personal Homepage at http://www.freelivecamgirls.net and i'll talk to you in a bit hun! If you join and the webchat is already full im sorry, just wait like 5 minutes and then you'll be able to see me LIVE!! *Remember* this is my Personal Homepage so of course its *FREE* =)
! <333 *Jenny* <333
!
! PS.Remember my Personal Homepage is http://www.freelivecamgirls.net and hopefully I can chat with you soon!! oh yah!! If you don't have a webcam of your own its ok!! You can still watch and chat with me then!! ok!! ByE!!
! <333 *Jenny* <333
!
!
!
!
!
!
!
! 3e
!
! ---------------------------------------------------------------------------
Index: TestMailParse005.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse005.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse005.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse005.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 3,7 ****
Message-ID: <200...@ns...><br />
From: <b><font color="black">XoSeXyGoDdEsSm@<b><font color="black">hotmail.com</font></b></font></b> (<b><font color="black">XoSeXyGoDdEsSm@<b><font color="black">hotmail.com</font></b></font></b>)<br />
! Subject: Re: <b><font color="black">screename</font></b> <b><font color="black">change</font></b> <b><font color="black">again</font></b> <b><font color="black">lol</font></b><br />
X-UIDL: ll'"!$DC!!7ld"!85K!!<br />
<br />
--- 3,7 ----
Message-ID: <200...@ns...><br />
From: <b><font color="black">XoSeXyGoDdEsSm@<b><font color="black">hotmail.com</font></b></font></b> (<b><font color="black">XoSeXyGoDdEsSm@<b><font color="black">hotmail.com</font></b></font></b>)<br />
! Subject: Re: <b><font color="black">screename</font></b> <b><font color="black">change</font></b> <b><font color="black">again</font></b> <b><font color="black">lol</font></b><br />
X-UIDL: ll'"!$DC!!7ld"!85K!!<br />
<br />
Index: TestMailParse006.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse006.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse006.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse006.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,14 ****
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! We're so sure about the quality and effectiveness of our p.roducts, we
! offer a full 90 day m.oney b.ack g.uarantee. If any customer is not
! satisfied with our products, for any reason whatsoever, simply return the
! u.nused p.ortion within 90 days for a 100% r.efund. NO QUESTIONS ASKED.
!
! Our goal is to educate you and make you aware of our new and absolutely
! miraculous Internationally Patented Nobel Prize Validated Biotechnology.
!
! C.lick b.elow f.or m.ore i.nformation, it's f.r.e.e.
--- 1,14 ----
! From: blank
Subject: [spam]
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! We're so sure about the quality and effectiveness of our p.roducts, we
! offer a full 90 day m.oney b.ack g.uarantee. If any customer is not
! satisfied with our products, for any reason whatsoever, simply return the
! u.nused p.ortion within 90 days for a 100% r.efund. NO QUESTIONS ASKED.
!
! Our goal is to educate you and make you aware of our new and absolutely
! miraculous Internationally Patented Nobel Prize Validated Biotechnology.
!
! C.lick b.elow f.or m.ore i.nformation, it's f.r.e.e.
Index: TestMailParse007.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse007.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse007.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse007.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,84 ****
! Return-Path: <mov...@ea...>
! Received: from mail126.ninodw.com ([204.117.162.126])
! by priv-edtnes62.telusplanet.net (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with SMTP
! id <200...@ma...> for <jo...@te...>; Sat, 11 Jan 2003 06:04:06 -0700
! To: jo...@te...
! Date: Sat, 11 Jan 2003 07:05:44 -0500
! Message-ID: <104...@ma...>
! X-Mailer: Gnus/5.090001 (Oort Gnus v0.01) XEmacs/21.2 (Terspichore)
! From: mov...@ea...
! Reply-To: <mov...@ea...>
Subject: [spam] Think you've seen porn? How about a dog giving 5+ girls oral sex till they cum?
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! So you think you've seen some crazy porn?
! Think you've seen all the crazy stuff out there?
! YOU HAVEN'T SEEN SHIT!!! I saw the most un fuckin
! believable video clip thats ever been on our sick
! and twisted internet! Know this I've been surfing
! porn and the internet since it began! I have seen
! it all..and this shocked the shit out of me even!!!
! READ BELOW ABOUT THIS CLIP!
! THE FOOTAGE OF HARVEY THE PUSSY EATING WONDER DOG!
! It takes place on a farm somewhere within the u.s.
! I would say the clip is just shy of 9 minutes long
! and seems to be shot by a home cam recorder. In
! this nine minutes you see a group of 5+ girls in
! a barn looking building..at first they are having
! light sex with each other..this is nothing..then
! jackpot. Another girl comes in with a dog that we
! now know is named harvey. She brings the dog in
! and leads him to one of her friends who is already
! naked and spreading her legs. The dog goes right
! to work..eats her pussy like he's done it a hundred
! times before..and folks he has.
! http://www.nfogroup.net/ral/
! She takes him to her next friend and the next and
! the friend after that. By this point we got why
! this clip of was being taken..these farm girl sluts
! have been doing this regularly. They get together
! and this dog, harvey, gives them all head..i'm
! talking like a pro here..if you or I could satisfy
! half the women this dog can we would have it made.
! This dog is amazing..hes just pussy eating machine..
! and these slutty farm girls love it to death. I've
! seen animal sites and so on but i've never seen actual
! amature footage like this in my life.
! HOW AND WHY THE FOOTAGE WAS FILMED?
! It was the little brother of one of the girls..whichever
! girl actually lived at this place. This little brother
! sneaks into the barn and films his slutty sister and
! all her friends doing this ..then puts in on the internet!!!!
! How hilarious is that..I can't imagine how those girls feel
! now..there will be definite lawsuits over this one. Maybe
! he didn't like what they were doing to his dog or maybe
! he just hated his sister like all siblings but either
! way the little bastard filmed it then put it on the net!
! http://www.nfogroup.net/ral/
! HERE'S THE GOOD PART
! The guys that now have this footage aren't normal porn site guys..
! they are just guys that get off on posting extreme shit to the
! public. That means they don't want any money from us surfers
! to watch the show. What they do instead is make you play this
! little game that asks how many girls did harvey give an
! orgasm to in one session and you have to pick one of three
! answers. IF you win you're in and get to watch..if you
! lose you're out. The reason they do this is I guess if we
! play the game we willingly went and tried to watch it..we
! are not some government agent who is trying to get them in
! trouble. I think because of nature of they content they just
! wanna make sure you are into this stuff..but hell if you're
! like me and don't know dick about this stuff then just guess..
! thats what I did and it resulted in the most shocking movie
! clip of i've seen to date!
! MY STRONGEST RECOMMENDATION
! If you're like me and like seeing this kind if bizarre stuff..
! get in there and see it now while its still up. Before somebody
! gets it taken offline...but even more important that that,
! DO NOT FUCKING FORGET TO SAVE IT TO DISK!YOU'RE FRIENDS WILL NOT
! BELIEVE YOU WITHOUT PROOF..TRUST ME ON THIS. SAVE IT SO THAT
! WHEN YOU TELL THEM YOU CAN ALSO SHOW THEM.
! http://www.nfogroup.net/ral/
! CLICK HERE TO WATCH HARVEY GIVE HEAD TO GROUP OF 18 year old FARM SLUTS!!!!}
! wbcng^gryhfcynarg(arg
--- 1,84 ----
! Return-Path: <mov...@ea...>
! Received: from mail126.ninodw.com ([204.117.162.126])
! by priv-edtnes62.telusplanet.net (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with SMTP
! id <200...@ma...> for <jo...@te...>; Sat, 11 Jan 2003 06:04:06 -0700
! To: jo...@te...
! Date: Sat, 11 Jan 2003 07:05:44 -0500
! Message-ID: <104...@ma...>
! X-Mailer: Gnus/5.090001 (Oort Gnus v0.01) XEmacs/21.2 (Terspichore)
! From: mov...@ea...
! Reply-To: <mov...@ea...>
Subject: [spam] Think you've seen porn? How about a dog giving 5+ girls oral sex till they cum?
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! So you think you've seen some crazy porn?
! Think you've seen all the crazy stuff out there?
! YOU HAVEN'T SEEN SHIT!!! I saw the most un fuckin
! believable video clip thats ever been on our sick
! and twisted internet! Know this I've been surfing
! porn and the internet since it began! I have seen
! it all..and this shocked the shit out of me even!!!
! READ BELOW ABOUT THIS CLIP!
! THE FOOTAGE OF HARVEY THE PUSSY EATING WONDER DOG!
! It takes place on a farm somewhere within the u.s.
! I would say the clip is just shy of 9 minutes long
! and seems to be shot by a home cam recorder. In
! this nine minutes you see a group of 5+ girls in
! a barn looking building..at first they are having
! light sex with each other..this is nothing..then
! jackpot. Another girl comes in with a dog that we
! now know is named harvey. She brings the dog in
! and leads him to one of her friends who is already
! naked and spreading her legs. The dog goes right
! to work..eats her pussy like he's done it a hundred
! times before..and folks he has.
! http://www.nfogroup.net/ral/
! She takes him to her next friend and the next and
! the friend after that. By this point we got why
! this clip of was being taken..these farm girl sluts
! have been doing this regularly. They get together
! and this dog, harvey, gives them all head..i'm
! talking like a pro here..if you or I could satisfy
! half the women this dog can we would have it made.
! This dog is amazing..hes just pussy eating machine..
! and these slutty farm girls love it to death. I've
! seen animal sites and so on but i've never seen actual
! amature footage like this in my life.
! HOW AND WHY THE FOOTAGE WAS FILMED?
! It was the little brother of one of the girls..whichever
! girl actually lived at this place. This little brother
! sneaks into the barn and films his slutty sister and
! all her friends doing this ..then puts in on the internet!!!!
! How hilarious is that..I can't imagine how those girls feel
! now..there will be definite lawsuits over this one. Maybe
! he didn't like what they were doing to his dog or maybe
! he just hated his sister like all siblings but either
! way the little bastard filmed it then put it on the net!
! http://www.nfogroup.net/ral/
! HERE'S THE GOOD PART
! The guys that now have this footage aren't normal porn site guys..
! they are just guys that get off on posting extreme shit to the
! public. That means they don't want any money from us surfers
! to watch the show. What they do instead is make you play this
! little game that asks how many girls did harvey give an
! orgasm to in one session and you have to pick one of three
! answers. IF you win you're in and get to watch..if you
! lose you're out. The reason they do this is I guess if we
! play the game we willingly went and tried to watch it..we
! are not some government agent who is trying to get them in
! trouble. I think because of nature of they content they just
! wanna make sure you are into this stuff..but hell if you're
! like me and don't know dick about this stuff then just guess..
! thats what I did and it resulted in the most shocking movie
! clip of i've seen to date!
! MY STRONGEST RECOMMENDATION
! If you're like me and like seeing this kind if bizarre stuff..
! get in there and see it now while its still up. Before somebody
! gets it taken offline...but even more important that that,
! DO NOT FUCKING FORGET TO SAVE IT TO DISK!YOU'RE FRIENDS WILL NOT
! BELIEVE YOU WITHOUT PROOF..TRUST ME ON THIS. SAVE IT SO THAT
! WHEN YOU TELL THEM YOU CAN ALSO SHOW THEM.
! http://www.nfogroup.net/ral/
! CLICK HERE TO WATCH HARVEY GIVE HEAD TO GROUP OF 18 year old FARM SLUTS!!!!}
! wbcng^gryhfcynarg(arg
Index: TestMailParse007.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse007.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse007.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse007.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 1,6 ****
<tt>Return-Path: <<b><font color="black">mov...@ea...</font></b>><br />
Received: from <b><font color="black">mail126.<b><font color="black">ninodw.com</font></b></font></b> ([<b><font color="black">204.117.162.126</font></b>])<br />
! by priv-edtnes62.telusplanet.net (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with SMTP<br />
! id <200...@ma...> for <jo...@te...>; Sat, 11 Jan <br />
2003 06:04:06 -0700<br />
To: <b><font color="black">jopat@<b><font color="black">telusplanet.net</font></b></font></b><br />
--- 1,6 ----
<tt>Return-Path: <<b><font color="black">mov...@ea...</font></b>><br />
Received: from <b><font color="black">mail126.<b><font color="black">ninodw.com</font></b></font></b> ([<b><font color="black">204.117.162.126</font></b>])<br />
! by <b><font color="black">priv-edtnes62.<b><font color="black">telusplanet.net</font></b></font></b> (<b><font color="black">InterMail</font></b> vM.<b><font color="black">5.01.05.17</font></b> 201-253-122-126-117-20021021) with SMTP<br />
! id <20030111130406.PNIT1598.priv-edtnes62.telusplanet.net@<b><font color="black">mail126.<b><font color="black">ninodw.com</font></b></font></b>> for <<b><font color="black">jo...@te...</font></b>>; Sat, 11 Jan <br />
2003 06:04:06 -0700<br />
To: <b><font color="black">jopat@<b><font color="black">telusplanet.net</font></b></font></b><br />
Index: TestMailParse008.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse008.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse008.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse008.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,45 ****
! Return-Path: <sy...@ya...>
! Received: from N1N ([218.0.119.123]) by priv-edtnes27.telusplanet.net
! (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with ESMTP
! id <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N>
! for <jo...@te...>; Sat, 11 Jan 2003 18:34:44 -0700
! From: "Alice" <sy...@ya...>
Subject: [spam] Hello,Let us enjoy life!
! To: jo...@te...
! Content-Type: text/html;
! charset="us-ascii"
! Date: Sun, 12 Jan 2003 09:34:46 +0800
! X-Priority: 3
! X-Mailer: jpfree Group Mail Express V1.0
! Message-Id: <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N>
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! <HTML><HEAD><TITLE></TITLE>
! <META content="Microsoft FrontPage 4.0" name=GENERATOR></HEAD>
! <BODY text=#000000 bgColor=#EFEBE7>
! <table border="0" width="500" cellspacing="0" cellpadding="5">
! <tr>
! <td valign="top">
! <h1 align=left><FONT size=4><B>Hello friends.</B></FONT></h1>
! <h1 align=left><B><FONT size=4 face="Arial Narrow">I bet you would just love
! to<a href="http://www.apple999.com/xy1.htm">
! see me naked</a>, spreading my candy for ya. I can not even tell
! you how much it turns me on to have people <a
! href="http://www.apple999.com/xy1.htm">get
! off to my naked sexy body</a>. I've left you a tiny sample of
! my gorgeousness just to give you a itty- bitty taste of what's
! inside if you join. So when you are done checking out my different
! pics- it's time for you to join!!</FONT></B></h1>
! <P align=left><B><FONT size=4 face="Arial"><a
! href="http://www.apple999.com/xy1.htm">See
! you inside...</a></FONT></B></P>
! <P align=left><B><FONT size=4 face="Arial"><a
! href="http://www.apple999.com/xy1.htm">Hugs
! & Kisses,</a><BR>
! Kurious</FONT></B></P>
! </td>
! <td><a href="http://www.apple999.com/xy1.htm">
! </tr>
! </table>
! </BODY></HTML>
--- 1,45 ----
! Return-Path: <sy...@ya...>
! Received: from N1N ([218.0.119.123]) by priv-edtnes27.telusplanet.net
! (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with ESMTP
! id <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N>
! for <jo...@te...>; Sat, 11 Jan 2003 18:34:44 -0700
! From: "Alice" <sy...@ya...>
Subject: [spam] Hello,Let us enjoy life!
! To: jo...@te...
! Content-Type: text/html;
! charset="us-ascii"
! Date: Sun, 12 Jan 2003 09:34:46 +0800
! X-Priority: 3
! X-Mailer: jpfree Group Mail Express V1.0
! Message-Id: <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N>
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! <HTML><HEAD><TITLE></TITLE>
! <META content="Microsoft FrontPage 4.0" name=GENERATOR></HEAD>
! <BODY text=#000000 bgColor=#EFEBE7>
! <table border="0" width="500" cellspacing="0" cellpadding="5">
! <tr>
! <td valign="top">
! <h1 align=left><FONT size=4><B>Hello friends.</B></FONT></h1>
! <h1 align=left><B><FONT size=4 face="Arial Narrow">I bet you would just love
! to<a href="http://www.apple999.com/xy1.htm">
! see me naked</a>, spreading my candy for ya. I can not even tell
! you how much it turns me on to have people <a
! href="http://www.apple999.com/xy1.htm">get
! off to my naked sexy body</a>. I've left you a tiny sample of
! my gorgeousness just to give you a itty- bitty taste of what's
! inside if you join. So when you are done checking out my different
! pics- it's time for you to join!!</FONT></B></h1>
! <P align=left><B><FONT size=4 face="Arial"><a
! href="http://www.apple999.com/xy1.htm">See
! you inside...</a></FONT></B></P>
! <P align=left><B><FONT size=4 face="Arial"><a
! href="http://www.apple999.com/xy1.htm">Hugs
! & Kisses,</a><BR>
! Kurious</FONT></B></P>
! </td>
! <td><a href="http://www.apple999.com/xy1.htm">
! </tr>
! </table>
! </BODY></HTML>
Index: TestMailParse008.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse008.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse008.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse008.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 1,12 ****
<tt>Return-Path: <<b><font color="black">sy...@ya...</font></b>><br />
Received: from N1N ([<b><font color="black">218.0.119.123</font></b>]) by <b><font color="black">priv-edtnes27.<b><font color="black">telusplanet.net</font></b></font></b><br />
! (InterMail vM.5.01.05.17 201-253-122-126-117-20021021) with ESMTP<br />
id <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N><br />
! for <jo...@te...>; Sat, 11 Jan 2003 18:34:44 -0700<br />
From: "<b><font color="black">Alice</font></b>" <<b><font color="black">syjet@<b><font color="black">yahu.com</font></b></font></b>><br />
Subject: <b><font color="black">Hello</font></b>,<b><font color="black">Let</font></b> us <b><font color="black">enjoy</font></b> <b><font color="black">life</font></b>!<br />
To: <b><font color="black">jopat@<b><font color="black">telusplanet.net</font></b></font></b><br />
Content-Type: text/html;<br />
! charset="us-ascii"<br />
Date: Sun, 12 Jan 2003 09:34:46 +0800<br />
X-Priority: 3<br />
--- 1,12 ----
<tt>Return-Path: <<b><font color="black">sy...@ya...</font></b>><br />
Received: from N1N ([<b><font color="black">218.0.119.123</font></b>]) by <b><font color="black">priv-edtnes27.<b><font color="black">telusplanet.net</font></b></font></b><br />
! (<b><font color="black">InterMail</font></b> vM.<b><font color="black">5.01.05.17</font></b> 201-253-122-126-117-20021021) with ESMTP<br />
id <20030112013444.RTJR4607.priv-edtnes27.telusplanet.net@N1N><br />
! for <<b><font color="black">jo...@te...</font></b>>; Sat, 11 Jan 2003 18:34:44 -0700<br />
From: "<b><font color="black">Alice</font></b>" <<b><font color="black">syjet@<b><font color="black">yahu.com</font></b></font></b>><br />
Subject: <b><font color="black">Hello</font></b>,<b><font color="black">Let</font></b> us <b><font color="black">enjoy</font></b> <b><font color="black">life</font></b>!<br />
To: <b><font color="black">jopat@<b><font color="black">telusplanet.net</font></b></font></b><br />
Content-Type: text/html;<br />
! charset="<b><font color="black">us-ascii</font></b>"<br />
Date: Sun, 12 Jan 2003 09:34:46 +0800<br />
X-Priority: 3<br />
Index: TestMailParse009.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse009.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse009.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse009.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,98 ****
! Return-Path: <bri...@po...>
! Received: from 200.204.91.79 (200.204.91.79)
! by mta564.mail.yahoo.com with SMTP; 09 Jan 2003 05:15:37 -0800 (PST)
! From: "Luke Perry" <bri...@po...>
! Reply-To: "Luke Perry" <bri...@po...>
! CC: big...@ya..., sta...@ya...
! Date: Thu, 9 Jan 2003 05:14:46 -0800
Subject: [spam] FánTástìç!! ^ comman6853 ^ »»» Eárn Mõnéy thrôuGh UnçõLLeçted Judgments! «««
! X-Priority: 1
! MIME-Version: 1.0
! X-Mailer: Microsoft Outlook Express 6.00.2600.0000
! X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000
! Content-Type: text/html; charset=us-ascii
! Content-Transfer-Encoding: 7bit
! Content-Length: 1094
! Message-ID: <avj...@FQ...>
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! <font color=white>*-*-**-*-*-*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*
! *-*-*-*-*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*****-----*-**-*-*-*--*-*--*-*-*-*-*-***-
! <BR>¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤<BR> comman6853
! *--*-*-**-*-*-*-*-*-*-*-*-*-*-*-*-*-*-**</font><br>
!
! <HTML>
! <font color=white
! size=1>comman6853YHERKHSDKJFHSDKJFHKSJDFHKJSDHFIUWEYRKHSDKFJHSDKJFHSRHFKSDJHFKJS
! DHFKSHDFKJSDHFUIWEYRIHWSFKJSDHKFJWIEURYKSJDHFKJSDHFKJSDHFKSHDFIWUEYRWHSFKJSDHFKH
! WEKRHKSJDFHWEYHFK^#$*#^^!*^*^$*^!^%^*^*^$&%^%(!@</font><br><!-- <REM: <URL2#####>>
! -->
!
!
!
!
!
!
!
!
! <HEAD></HEAD>
! <body>
! <P align=left>
! <IMG
! src="http://202.108.221.16/HTTP/ben/MBTY.jpg" border=0> <img
! src="http://202.108.221.16/HTTP/ben/EmailOffersOndemand.jpg" border="0">
! <BR><BR>
!
!
!
!
!
!
!
!
! <IMG src="http://202.108.221.16/HTTP/ben/sinko.gif" border=0>
! <BR><BR>
! <a href="mailto:28...@28..."><img
! src="http://202.108.221.16/HTTP/ben/clhere.gif" border="0"
! onMouseOver="window.status='283245'; return true"
! onMouseOut="window.status='283245'"></a>
! </P>
!
!
!
!
!
!
!
!
! <BR><BR><BR><BR><BR><BR><BR>
! 283245
! </body>
! <BR>
! <BR>
! <BR>
! <BR>
! <BR><font color=fuchsia face="tempus sans itc" ><b>LYK 01'08''03 C24
! <BR><BR><BR><BR>00066©<BR><BR>
! <BR>
! <BR>
! <BR><font color=white size=1>Current alternatives for recovering judgments
! inadequate-learn moreSecure Your Future Tod...@ya... at home-
! unlimited potentialHave you heard about this?com...@ya... Business
! needs your help!!!!We have the most advanced asset tracking services in the
! world!com...@ya... career in legal profession gives you fresh outlook
! on your lifeHere's your chance for the American Dre...@ya...
! methods for recovering judicial judgmentsRead Thi...@ya... the
! need to succeedHave you heard about this?com...@ya... needs
! a Home-Based BusinessTraining in the Judgment business -A great new business
! idea!com...@ya... the need to succeedHelp people who are awarded
! judgments - work from your hom...@ya... your job- work from
! homeSeeking Justice-Minded People for Business Opp...@ya...
! at home- unlimited potentialSuccessful judgment recovery
! bus...@ya... the fruits of your laborDedicated, Hard-Working
! People Nee...@ya...</FONT>
! <BR>
! <BR></HTML>
!
! [@^*#@*%_#%*#_$%*_#_]
--- 1,98 ----
! Return-Path: <bri...@po...>
! Received: from 200.204.91.79 (200.204.91.79)
! by mta564.mail.yahoo.com with SMTP; 09 Jan 2003 05:15:37 -0800 (PST)
! From: "Luke Perry" <bri...@po...>
! Reply-To: "Luke Perry" <bri...@po...>
! CC: big...@ya..., sta...@ya...
! Date: Thu, 9 Jan 2003 05:14:46 -0800
Subject: [spam] FánTástìç!! ^ comman6853 ^ »»» Eárn Mõnéy thrôuGh UnçõLLeçted Judgments! «««
! X-Priority: 1
! MIME-Version: 1.0
! X-Mailer: Microsoft Outlook Express 6.00.2600.0000
! X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000
! Content-Type: text/html; charset=us-ascii
! Content-Transfer-Encoding: 7bit
! Content-Length: 1094
! Message-ID: <avj...@FQ...>
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! <font color=white>*-*-**-*-*-*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*
! *-*-*-*-*-*-*-*-*-*-*-*-*--*-*-*-*-*-*-*****-----*-**-*-*-*--*-*--*-*-*-*-*-***-
! <BR>¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤<BR> comman6853
! *--*-*-**-*-*-*-*-*-*-*-*-*-*-*-*-*-*-**</font><br>
!
! <HTML>
! <font color=white
! size=1>comman6853YHERKHSDKJFHSDKJFHKSJDFHKJSDHFIUWEYRKHSDKFJHSDKJFHSRHFKSDJHFKJS
! DHFKSHDFKJSDHFUIWEYRIHWSFKJSDHKFJWIEURYKSJDHFKJSDHFKJSDHFKSHDFIWUEYRWHSFKJSDHFKH
! WEKRHKSJDFHWEYHFK^#$*#^^!*^*^$*^!^%^*^*^$&%^%(!@</font><br><!-- <REM: <URL2#####>>
! -->
!
!
!
!
!
!
!
!
! <HEAD></HEAD>
! <body>
! <P align=left>
! <IMG
! src="http://202.108.221.16/HTTP/ben/MBTY.jpg" border=0> <img
! src="http://202.108.221.16/HTTP/ben/EmailOffersOndemand.jpg" border="0">
! <BR><BR>
!
!
!
!
!
!
!
!
! <IMG src="http://202.108.221.16/HTTP/ben/sinko.gif" border=0>
! <BR><BR>
! <a href="mailto:28...@28..."><img
! src="http://202.108.221.16/HTTP/ben/clhere.gif" border="0"
! onMouseOver="window.status='283245'; return true"
! onMouseOut="window.status='283245'"></a>
! </P>
!
!
!
!
!
!
!
!
! <BR><BR><BR><BR><BR><BR><BR>
! 283245
! </body>
! <BR>
! <BR>
! <BR>
! <BR>
! <BR><font color=fuchsia face="tempus sans itc" ><b>LYK 01'08''03 C24
! <BR><BR><BR><BR>00066©<BR><BR>
! <BR>
! <BR>
! <BR><font color=white size=1>Current alternatives for recovering judgments
! inadequate-learn moreSecure Your Future Tod...@ya... at home-
! unlimited potentialHave you heard about this?com...@ya... Business
! needs your help!!!!We have the most advanced asset tracking services in the
! world!com...@ya... career in legal profession gives you fresh outlook
! on your lifeHere's your chance for the American Dre...@ya...
! methods for recovering judicial judgmentsRead Thi...@ya... the
! need to succeedHave you heard about this?com...@ya... needs
! a Home-Based BusinessTraining in the Judgment business -A great new business
! idea!com...@ya... the need to succeedHelp people who are awarded
! judgments - work from your hom...@ya... your job- work from
! homeSeeking Justice-Minded People for Business Opp...@ya...
! at home- unlimited potentialSuccessful judgment recovery
! bus...@ya... the fruits of your laborDedicated, Hard-Working
! People Nee...@ya...</FONT>
! <BR>
! <BR></HTML>
!
! [@^*#@*%_#%*#_$%*_#_]
Index: TestMailParse009.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse009.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse009.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse009.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 1,5 ****
<tt>Return-Path: <<b><font color="black">bri...@po...</font></b>><br />
Received: from <b><font color="black"><b><font color="black">200.204.91.79</font></b></font></b> (200.204.91.79)<br />
! by mta564.mail.yahoo.com with SMTP; 09 Jan 2003 05:15:37 -0800 (PST)<br />
From: "<b><font color="black">Luke</font></b> <b><font color="black">Perry</font></b>" <<b><font color="black">brienne66@<b><font color="black">poetic.com</font></b></font></b>><br />
Reply-To: "<b><font color="black">Luke</font></b> <b><font color="black">Perry</font></b>" <<b><font color="black">brienne66@<b><font color="black">poetic.com</font></b></font></b>><br />
--- 1,5 ----
<tt>Return-Path: <<b><font color="black">bri...@po...</font></b>><br />
Received: from <b><font color="black"><b><font color="black">200.204.91.79</font></b></font></b> (200.204.91.79)<br />
! by <b><font color="black">mta564.<b><font color="black">mail.<b><font color="black">yahoo.com</font></b></font></b></font></b> with SMTP; 09 Jan 2003 05:15:37 -0800 (PST)<br />
From: "<b><font color="black">Luke</font></b> <b><font color="black">Perry</font></b>" <<b><font color="black">brienne66@<b><font color="black">poetic.com</font></b></font></b>><br />
Reply-To: "<b><font color="black">Luke</font></b> <b><font color="black">Perry</font></b>" <<b><font color="black">brienne66@<b><font color="black">poetic.com</font></b></font></b>><br />
Index: TestMailParse010.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse010.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse010.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse010.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
***************
*** 1,30 ****
! X-Auth-No:
! Return-Path: <cs...@my...>
! Received: from digitalme.com not authenticated [193.97.97.75]
! by smtp-send.myrealbox.com with NetMail SMTP Agent $Revision$ on Novell NetWare;
! Sat, 11 Jan 2003 22:03:10 -0700
! Received: from 65.100.247.142 not authenticated [65.100.247.142]
! by digitalme.com with Novell NIMS $Revision$ on Novell NetWare;
! Sat, 11 Jan 2003 21:03:05 -0800
! Message-ID: <44747.22535@>
! From: U'S'A L'o't't'e'r'y L'T'D <iplvwq@>
Subject: [spam] Re[4]: Regarding your activation
! To: ar...@my...
! Date: Sun, 12 Jan 2003 07:03:25
! MIME-Version: 1.0
! Content-Type: text/plain;
! charset="windows-1252"
! Content-Transfer-Encoding: base64
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Q29uZ3JhdHVsYXRpb25zIQ0KDQpZb3UgaGF2ZSBiZWVuIHNlbGVjdGVkIHRvIHJlY2lldmUgbWVt
! YmVyc2hpcCB0byB0aGUgYmVzdCBBYERgVWBMYFQgc2l0ZSBvZg0KeW91ciBjaG9pY2UgZm9yIEZe
! Ul5FXkUhDQoNCiogV2UgaGF2ZSBldmVyeSBuaWNoZSB5b3UgY2FuIGltYWdpbmUhDQoqIEFsbCBv
! ZiBvdXIgc2l0ZXMgaGFzIGhpZ2ggcXVhbGl0eSBjb250YW50IQ0KKiBZb3UgY2FuIGJlIGluIG1l
! bWJlciBzZWN0aW9uIGluIGxlc3MgdGhhbiAxIG1pbiENCiogQW5kIGJlc3Qgb2YgYWxsOiBOYE9g
! IENgUmBFYERgSWBUYCBDYEFgUmBEYCBOYEVgRWBEYEVgRGANCg0KKiAqICogKiBDYExgSWBDYEsg
! SGBFYFJgRSAqICogKiAqDQpodHRwOi8vYmVzdHBvcm5ob3N0LmNvbS92YXN5YS8/aW5zdGFudGhv
! bWUNCg0KSWYgeW91IGhhdmUgcmVjZWl2ZWQgdGhpcyBtYWlsaW5nIGluIGVycm9yLCBvciBkbyBu
! b3Qgd2lzaCB0byByZWNlaXZlIGFueQ0KZnVydGhlciBvZmZlcnMgb3Igbm90aWNlcyBmcm9tIHVz
! LCBzaW1wbHkNCmdvIGhlcmU6IHJtX21lX25vd0BtYWlsLmNvbT9zdWJqZWN0PURy
--- 1,30 ----
! X-Auth-No:
! Return-Path: <cs...@my...>
! Received: from digitalme.com not authenticated [193.97.97.75]
! by smtp-send.myrealbox.com with NetMail SMTP Agent $Revision$ on Novell NetWare;
! Sat, 11 Jan 2003 22:03:10 -0700
! Received: from 65.100.247.142 not authenticated [65.100.247.142]
! by digitalme.com with Novell NIMS $Revision$ on Novell NetWare;
! Sat, 11 Jan 2003 21:03:05 -0800
! Message-ID: <44747.22535@>
! From: U'S'A L'o't't'e'r'y L'T'D <iplvwq@>
Subject: [spam] Re[4]: Regarding your activation
! To: ar...@my...
! Date: Sun, 12 Jan 2003 07:03:25
! MIME-Version: 1.0
! Content-Type: text/plain;
! charset="windows-1252"
! Content-Transfer-Encoding: base64
X-Text-Classification: spam
X-POPFile-Link: <http://127.0.0.1:8080/jump_to_message?view=popfile0=0.msg>
! Q29uZ3JhdHVsYXRpb25zIQ0KDQpZb3UgaGF2ZSBiZWVuIHNlbGVjdGVkIHRvIHJlY2lldmUgbWVt
! YmVyc2hpcCB0byB0aGUgYmVzdCBBYERgVWBMYFQgc2l0ZSBvZg0KeW91ciBjaG9pY2UgZm9yIEZe
! Ul5FXkUhDQoNCiogV2UgaGF2ZSBldmVyeSBuaWNoZSB5b3UgY2FuIGltYWdpbmUhDQoqIEFsbCBv
! ZiBvdXIgc2l0ZXMgaGFzIGhpZ2ggcXVhbGl0eSBjb250YW50IQ0KKiBZb3UgY2FuIGJlIGluIG1l
! bWJlciBzZWN0aW9uIGluIGxlc3MgdGhhbiAxIG1pbiENCiogQW5kIGJlc3Qgb2YgYWxsOiBOYE9g
! IENgUmBFYERgSWBUYCBDYEFgUmBEYCBOYEVgRWBEYEVgRGANCg0KKiAqICogKiBDYExgSWBDYEsg
! SGBFYFJgRSAqICogKiAqDQpodHRwOi8vYmVzdHBvcm5ob3N0LmNvbS92YXN5YS8/aW5zdGFudGhv
! bWUNCg0KSWYgeW91IGhhdmUgcmVjZWl2ZWQgdGhpcyBtYWlsaW5nIGluIGVycm9yLCBvciBkbyBu
! b3Qgd2lzaCB0byByZWNlaXZlIGFueQ0KZnVydGhlciBvZmZlcnMgb3Igbm90aWNlcyBmcm9tIHVz
! LCBzaW1wbHkNCmdvIGhlcmU6IHJtX21lX25vd0BtYWlsLmNvbT9zdWJqZWN0PURy
Index: TestMailParse010.col
===================================================================
RCS file: /cvsroot/popfile/engine/tests/Attic/TestMailParse010.col,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** TestMailParse010.col 20 Mar 2003 11:12:55 -0000 1.1.4.1
--- TestMailParse010.col 11 Apr 2003 00:38:06 -0000 1.1.4.2
***************
*** 2,9 ****
Return-Path: <<b><font color="black">cs...@my...</font></b>><br />
Received: from <b><font color="black">digitalme.com</font></b> not <b><font color="black">authenticated</font></b> [<b><font color="black">193.97.97.75</font></b>]<br />
! by smtp-send.myrealbox.com with NetMail SMTP Agent $Revision$ on Novell NetWare;<br />
Sat, 11 Jan 2003 22:03:10 -0700<br />
Received: from <b><font color="black"><b><font color="black">65.100.247.142</font></b></font></b> not <b><font color="black">authenticated</font></b> [65.100.247.142]<br />
! by digitalme.com with Novell NIMS $Revision$ on Novell NetWare;<br />
Sat, 11 Jan 2003 21:03:05 -0800<br />
Message-ID: <44747.22535@><br />
--- 2,9 ----
Return-Path: <<b><font color="black">cs...@my...</font></b>><br />
Received: from <b><font color="black">digitalme.com</font></b> not <b><font color="black">authenticated</font></b> [<b><font color="black">193.97.97.75</font></b>]<br />
! by <b><font color="black">smtp-send.<b><font color="black">myrealbox.com</font></b></font></b> with <b><font color="black">NetMail</font></b> SMTP <b><font color="black">Agent</font></b> $<b><font color="black">Revision</font></b>: 1.1 $ on <b><font color="black">Novell</font></b> <b><font color="black">NetWare</font></b>;<br />
Sat, 11 Jan 2003 22:03:10 -0700<br />
Received: from <b><font color="black"><b><font color="black">65.100.247.142</font></b></font></b> not <b><font color="black">authenticated</font></b> [65.100.247.142]<br />
! by <b><font color="black">digitalme.com</font></b> with <b><font color="black">Novell</font></b> <b><font color="black">NIMS</font></b> $<b><font color="black">Revision</font></b>: 1.1 $ on <b><font color="black">Novell</font></b> <b><font color="black">NetWare</font></b>;<br />
Sat, 11 Jan 2003 21:03:05 -0800<br />
Message-ID: <44747.22535@><br />
***************
*** 14,18 ****
MIME-Version: 1.0<br />
Content-Type: text/plain;<br />
! charset="windows-1252"<br />
Content-Transfer-Encoding: base64<br />
<br />
--- 14,18 ----
MIME-Version: 1.0<br />
Content-Type: text/plain;<br />
! charset="<b><font color="black">windows-1252</font></b>"<br />
Content-Transfer-Encoding: base64<br />
<br />
Index: TestMailParse011.cam
===================================================================
RCS file: /cvsroot/popfile/engine/tests/TestMailParse011.cam,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** TestMailParse011.cam 28 Feb 2003 01:57:23 -0000 1.2
--- TestMailParse011.cam 11 Apr 2003 00:38:06 -0000 1.2.2.1
...
[truncated message content] |
|
From: <ssc...@us...> - 2003-04-11 00:34:00
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv28186
Modified Files:
Tag: v0/18/1
MailParse.pm
Log Message:
Fixes:
695565 Continued cc lines not colorized
702215 POPFile 0.18.1 fails do decode base64 attachments.
702316 0.18.1: nested QP parts - not decoded
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.91.2.2
retrieving revision 1.91.2.3
diff -C2 -d -r1.91.2.2 -r1.91.2.3
*** MailParse.pm 10 Apr 2003 22:11:23 -0000 1.91.2.2
--- MailParse.pm 11 Apr 2003 00:33:57 -0000 1.91.2.3
***************
*** 740,743 ****
--- 740,748 ----
my $encoding = '';
+ # Variables to save header information to while parsing headers
+
+ my $header;
+ my $argument;
+
# Clear the word hash
***************
*** 789,967 ****
print ">>> $line" if $self->{debug};
! if ( $self->{color} ) {
! my $splitline = $line;
! $splitline =~ s/([^\r\n]{100,120} )/$1\r\n/g;
! $splitline =~ s/([^ \r\n]{120})/$1\r\n/g;
! if ( !$self->{in_html_tag} ) {
! $colorized .= $self->{ut} if ( $self->{ut} ne '' );
!
$self->{ut} = '';
! }
!
! #Escape some HTML characters to ensure display in HTML UI
! $splitline =~ s/</</g;
! $splitline =~ s/>/>/g;
!
! if ( $encoding =~ /quoted\-printable/i ) {
! $splitline =~ s/=3C/</g;
! $splitline =~ s/=3E/>/g;
! }
! $splitline =~ s/\t/ /g;
! $self->{ut} .= $splitline;
! }
- if ($self->{in_headers}) {
-
# Check for blank line signifying end of headers
-
- if ( $line =~ /^(\r\n|\r|\n)/) {
- $self->{in_headers} = 0;
- print "Header parsing complete.\n" if $self->{debug};
- }
-
- # If we have an email header then just keep the part after the :
-
- if ( $line =~ /^([A-Za-z-]+):[ \t]*([^\n\r]*)/ ) {
- my $header = $1;
- my $argument = $2;
-
- print "Header ($header) ($argument)\n" if ($self->{debug});
-
- # Handle the From, To and Cc headers and extract email addresses
- # from them and treat them as words
! # For certain headers we are going to mark them specially in the corpus
! # by tagging them with where they were found to help the classifier
! # do a better job. So if you have
! #
! # From: fo...@ba...
! #
! # then we'll add from:fo...@ba... to the corpus and not just fo...@ba...
! my $prefix = '';
!
! if ( $header =~ /^(From|To|Cc|Reply\-To)/i ) {
!
! # Concatenate multi-line fields (To, CC)
!
! if ( ( $header =~ /^To/i ) || ( $header =~ /^Cc/i ) ) {
! my $currpos = tell MSG;
! my $tempread = <MSG>;
! while ( $tempread =~ s/^[ \t]+(.*?)[\r\n]+// ) {
! if ( $1 ne '' ) {
! $argument .= $1;
! $currpos = tell MSG;
! $tempread = <MSG>;
! } else {
! last;
! }
! }
! seek MSG, $currpos, 0;
! print "\n$header: [[$argument]]\n" if $self->{debug};
! }
! if ( $argument =~ /=\?(.{1,40})\?/ ) {
! update_word( $self, $1, 0, '', '', 'charset' );
! }
!
! if ( $header =~ /^From/i ) {
! $encoding = '';
! $self->{content_type} = '';
! $self->{from} = $self->decode_string( $argument ) if ( $self->{from} eq '' ) ;
! $prefix = 'from';
! }
! $prefix = 'to' if ( $header =~ /^To/i );
! $self->{to} = $self->decode_string( $argument ) if ( ( $header =~ /^To/i ) && ( $self->{to} eq '' ) );
!
! $prefix = 'cc' if ( $header =~ /^Cc/i );
! $self->{cc} = $self->decode_string( $argument ) if ( ( $header =~ /^Cc/i ) && ( $self->{cc} eq '' ) );
!
! while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
! update_word($self, $1, 0, ';', '&',$prefix);
! add_url($self, $2, 0, '@', '[&<]',$prefix);
! }
- while ( $argument =~ s/([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+))// ) {
- update_word($self, $1, 0, '', '',$prefix);
- add_url($self, $2, 0, '@', '',$prefix);
- }
-
- add_line( $self, $argument, 0, $prefix );
- next;
- }
-
- $self->{subject} = $self->decode_string( $argument ) if ( ( $header =~ /^Subject/i ) && ( $self->{subject} eq '' ) );
! if ( $header =~ /^Subject/i ) {
! $prefix = 'subject';
! }
! $self->{date} = $argument if ( $header =~ /^Date/i );
! # Look for MIME
! if ( $header =~ /^Content-Type/i ) {
! if ( $argument =~ /multipart\//i ) {
! my $boundary = $argument;
!
! if ( !( $argument =~ /boundary=(\"([A-Z0-9\'\(\)\+\_\,\-\.\/\:\=\?][A-Z0-9\'\(\)\+_,\-\.\/:=\? ]{0,69})\"|([^\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]{1,70}))/i )) {
! $boundary = <MSG>;
! }
!
! if ( $boundary =~ /boundary=(\"([A-Z0-9\'\(\)\+\_\,\-\.\/\:\=\?][A-Z0-9\'\(\)\+_,\-\.\/:=\? ]{0,69})\"|([^\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]{1,70}))/i ) {
!
! $boundary = ($2 || $3);
!
! $boundary =~ s/(\+|\/|\?|\*|\||\(|\)|\[|\]|\{|\}|\^|\$|\.)/\\$1/g;
!
! if ($mime ne '') {
!
! # Fortunately the pipe character isn't a valid mime boundary character!
!
! $mime = join('|', $mime, $boundary);
! } else {
! $mime = $boundary;
! }
! print "Set mime boundary to " . $mime . "\n" if $self->{debug};
! next;
! }
! }
! if ( $argument =~ /charset=\"?([^\"]{1,40})\"?/ ) {
! update_word( $self, $1, 0, '' , '', 'charset' );
! }
!
! if ( $argument =~ /^(.*?)(;$)/ ) {
! print "Set content type to $1\n" if $self->{debug};
! $self->{content_type} = $1;
! }
! next;
! }
!
! # Look for the different encodings in a MIME document, when we hit base64 we will
! # do a special parse here since words might be broken across the boundaries
!
! if ( $header =~ /^Content-Transfer-Encoding/i ) {
! $encoding = $argument;
! print "Setting encoding to $encoding\n" if $self->{debug};
! my $compact_encoding = $encoding;
! $compact_encoding =~ s/[^A-Za-z0-9]//g;
! increment_word( $self, "encoding:$compact_encoding" );
! next;
! }
!
! # Some headers to discard
!
! next if ( $header =~ /^(Thread-Index|X-UIDL|Message-ID|X-Text-Classification|X-Mime-Key)/i );
!
! add_line( $self, $argument, 0, $prefix );
!
next;
}
}
--- 794,854 ----
print ">>> $line" if $self->{debug};
! if ($self->{color}) {
! if (!$self->{in_html_tag}) {
! $colorized .= $self->{ut};
$self->{ut} = '';
! }
!
! $self->{ut} .= splitline($line, $encoding);
! }
! if ($self->{in_headers}) {
+ # temporary colorization while in headers is handled within parse_header
! $self->{ut} = '';
# Check for blank line signifying end of headers
! if ( $line =~ /^(\r\n|\r|\n)/) {
!
! # Parse the last header
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding);
! # Clear the saved headers
! $header = '';
! $argument = '';
!
! $self->{ut} .= splitline("\015\012", 0);
! $self->{in_headers} = 0;
! print "Header parsing complete.\n" if $self->{debug};
! next;
! }
! # If we have an email header then just keep the part after the :
! if ( $line =~ /^([A-Za-z-]+):[ \t]*([^\n\r]*)/ ) {
! # Parse the last header
! ($mime,$encoding) = $self->parse_header($header,$argument,$mime,$encoding) if ($header ne '');
! # Save the new information for the current header
!
! $header = $1;
! $argument = $2;
next;
}
+
+ # Append to argument if the next line begins with whitespace (isn't a new header)
+
+ if ( $line =~ /^([\t ].*?)(\r\n|\r|\n)/ ) {
+ $argument .= "\015\012" . $1;
+ }
+ next;
}
***************
*** 1074,1077 ****
--- 961,1117 ----
# ---------------------------------------------------------------------------------------------
#
+ # parse_header - Performs parsing operations on a message header
+ #
+ # $header Name of header being processed
+ # $argument Value of header being processed
+ # $mime The presently saved mime boundaries list
+ # $encoding Current message encoding
+ #
+ # ---------------------------------------------------------------------------------------------
+ sub parse_header
+ {
+ my ($self, $header, $argument, $mime, $encoding) = @_;
+
+ print "Header ($header) ($argument)\n" if ($self->{debug});
+
+ # Remove over-reading
+ $self->{ut} = '';
+
+ # Qeueue just this header for colorization
+ $self->{ut} = splitline("$header: $argument\015\012", $encoding);
+
+ # Check the encoding type in all RFC 2047 encoded headers
+
+ if ( $argument =~ /=\?(.{1,40})\?(Q|B)/i ) {
+ update_word( $self, $1, 0, '', '', 'charset' );
+ }
+
+ # Handle the From, To and Cc headers and extract email addresses
+ # from them and treat them as words
+
+
+ # For certain headers we are going to mark them specially in the corpus
+ # by tagging them with where they were found to help the classifier
+ # do a better job. So if you have
+ #
+ # From: fo...@ba...
+ #
+ # then we'll add from:fo...@ba... to the corpus and not just fo...@ba...
+
+ my $prefix = '';
+
+ if ( $header =~ /^(From|To|Cc|Reply\-To)$/i ) {
+
+ # These headers at least can be decoded
+
+ $argument = $self->decode_string( $argument );
+
+ if ( $argument =~ /=\?(.{1,40})\?/ ) {
+ update_word( $self, $1, 0, '', '', 'charset' );
+ }
+
+ if ( $header =~ /^From$/i ) {
+ $encoding = '';
+ $self->{content_type} = '';
+ $self->{from} = $argument if ( $self->{from} eq '' ) ;
+ $prefix = 'from';
+ }
+
+ if ( $header =~ /^To$/i ) {
+ $prefix = 'to';
+ $self->{to} = $argument if ( $self->{to} eq '' );
+ }
+
+ if ( $header =~ /^Cc$/i ) {
+ $prefix = 'cc';
+ $self->{cc} = $argument if ( $self->{cc} eq '' );
+ }
+
+ while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
+ update_word($self, $1, 0, ';', '&',$prefix);
+ add_url($self, $2, 0, '@', '[&<]',$prefix);
+ }
+
+ while ( $argument =~ s/([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+))// ) {
+ update_word($self, $1, 0, '', '',$prefix);
+ add_url($self, $2, 0, '@', '',$prefix);
+ }
+
+ add_line( $self, $argument, 0, $prefix );
+ return ($mime, $encoding);
+ }
+
+ if ( $header =~ /^Subject$/i ) {
+ $prefix = 'subject';
+ $argument = $self->decode_string( $argument );
+ $self->{subject} = $argument if ( ( $self->{subject} eq '' ) );
+ }
+
+ $self->{date} = $argument if ( $header =~ /^Date/i );
+
+ # Look for MIME
+
+ if ( $header =~ /^Content-Type$/i ) {
+
+ if ( $argument =~ /charset=\"?([^\"]{1,40})\"?/ ) {
+ update_word( $self, $1, 0, '' , '', 'charset' );
+ }
+
+ if ( $argument =~ /^(.*?)(;)/ ) {
+ print "Set content type to $1\n" if $self->{debug};
+ $self->{content_type} = $1;
+ }
+
+ if ( $argument =~ /multipart\//i ) {
+ my $boundary = $argument;
+
+ if ( $boundary =~ /boundary= ?(\"([A-Z0-9\'\(\)\+\_\,\-\.\/\:\=\?][A-Z0-9\'\(\)\+_,\-\.\/:=\? ]{0,69})\"|([^\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]{1,70}))/i ) {
+
+ $boundary = ($2 || $3);
+
+ $boundary =~ s/(.*)/\Q$1\E/g;
+
+ if ($mime ne '') {
+
+ # Fortunately the pipe character isn't a valid mime boundary character!
+
+ $mime = join('|', $mime, $boundary);
+ } else {
+ $mime = $boundary;
+ }
+ print "Set mime boundary to " . $mime . "\n" if $self->{debug};
+ return ($mime, $encoding);
+ }
+ }
+ return ($mime, $encoding);
+ }
+
+ # Look for the different encodings in a MIME document, when we hit base64 we will
+ # do a special parse here since words might be broken across the boundaries
+
+ if ( $header =~ /^Content-Transfer-Encoding$/i ) {
+ $encoding = $argument;
+ print "Setting encoding to $encoding\n" if $self->{debug};
+ my $compact_encoding = $encoding;
+ $compact_encoding =~ s/[^A-Za-z0-9]//g;
+ increment_word( $self, "encoding:$compact_encoding" );
+ return ($mime, $encoding);
+ }
+
+ # Some headers to discard
+
+ return ($mime, $encoding) if ( $header =~ /^(Thread-Index|X-UIDL|Message-ID|X-Text-Classification|X-Mime-Key)$/i );
+
+ # Some headers should never be RFC 2047 decoded
+
+ $argument = $self->decode_string($argument) unless ($header =~ /^(Revceived|Content\-Type|Content\-Disposition)$/i);
+
+ add_line( $self, $argument, 0, $prefix );
+
+ return ($mime, $encoding);
+ }
+
+ # ---------------------------------------------------------------------------------------------
+ #
# clear_out_base64
#
***************
*** 1149,1152 ****
--- 1189,1219 ----
}
+ # ---------------------------------------------------------------------------------------------
+ #
+ # splitline - Escapes characters so a line will print as plain-text within a HTML document.
+ #
+ # $line The line to escape
+ # $encoding The value of any current encoding scheme
+ #
+ # ---------------------------------------------------------------------------------------------
+
+ sub splitline
+ {
+ my ($line, $encoding) = @_;
+ $line =~ s/([^\r\n]{100,120} )/$1\r\n/g;
+ $line =~ s/([^ \r\n]{120})/$1\r\n/g;
+
+ $line =~ s/</</g;
+ $line =~ s/>/>/g;
+
+ if ( $encoding =~ /quoted\-printable/i ) {
+ $line =~ s/=3C/</g;
+ $line =~ s/=3E/>/g;
+ }
+
+ $line =~ s/\t/ /g;
+
+ return $line;
+ }
1;
|
|
From: <ssc...@us...> - 2003-04-10 23:19:56
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv15769
Modified Files:
HTML.pm
Log Message:
Hopefully fix bugs, and some bleeding edge issues:
[ 708387 ] unrequested History deletion
[ 703364 ] 0.18.1: history doesn't go back far enough
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.123
retrieving revision 1.124
diff -C2 -d -r1.123 -r1.124
*** HTML.pm 8 Apr 2003 00:51:47 -0000 1.123
--- HTML.pm 10 Apr 2003 23:19:53 -0000 1.124
***************
*** 172,178 ****
change_session_key($self);
- $self->remove_mail_files();
- $self->calculate_today();
-
# The parent needs a reference to the url handler function
$self->{url_handler_} = \&url_handler__;
--- 172,175 ----
***************
*** 191,194 ****
--- 188,196 ----
{
my ( $self ) = @_;
+
+ # This needs to occur at launch, but after initialization
+
+ $self->remove_mail_files();
+ $self->calculate_today();
# Ensure that the messages subdirectory exists
|
|
From: <ssc...@us...> - 2003-04-10 23:15:55
|
Update of /cvsroot/popfile/engine/UI
In directory sc8-pr-cvs1:/tmp/cvs-serv13475
Modified Files:
Tag: v0/18/1
HTML.pm
Log Message:
Hopefully fix bugs, and some bleeding edge issues:
[ 708387 ] unrequested History deletion
[ 703364 ] 0.18.1: history doesn't go back far enough
Index: HTML.pm
===================================================================
RCS file: /cvsroot/popfile/engine/UI/HTML.pm,v
retrieving revision 1.98.2.4
retrieving revision 1.98.2.5
diff -C2 -d -r1.98.2.4 -r1.98.2.5
*** HTML.pm 27 Mar 2003 04:12:09 -0000 1.98.2.4
--- HTML.pm 10 Apr 2003 23:15:46 -0000 1.98.2.5
***************
*** 171,177 ****
change_session_key($self);
- $self->remove_mail_files();
- $self->calculate_today();
-
return 1;
}
--- 171,174 ----
***************
*** 187,190 ****
--- 184,192 ----
{
my ( $self ) = @_;
+
+ # This needs to happen after the configuration is loaded to avoid using only defaults
+
+ $self->remove_mail_files();
+ $self->calculate_today();
# Load the current configuration from disk and then load up the
|
|
From: <ssc...@us...> - 2003-04-10 22:22:50
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv12482
Modified Files:
MailParse.pm
Log Message:
restore $self->{color__} logic
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.108
retrieving revision 1.109
diff -C2 -d -r1.108 -r1.109
*** MailParse.pm 10 Apr 2003 22:20:33 -0000 1.108
--- MailParse.pm 10 Apr 2003 22:22:47 -0000 1.109
***************
*** 830,840 ****
print ">>> $line" if $self->{debug};
! if (!$self->{in_html_tag__}) {
! $colorized .= $self->{ut__};
! $self->{ut__} = '';
! }
!
! $self->{ut__} .= splitline($line, $encoding);
if ($self->{in_headers__}) {
--- 830,843 ----
print ">>> $line" if $self->{debug};
+
+ if ($self->{color__}) {
! if (!$self->{in_html_tag__}) {
! $colorized .= $self->{ut__};
! $self->{ut__} = '';
! }
!
! $self->{ut__} .= splitline($line, $encoding);
! }
if ($self->{in_headers__}) {
|
|
From: <ssc...@us...> - 2003-04-10 22:20:39
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv10843
Modified Files:
MailParse.pm
Log Message:
restore proper colorization of multi-line HTML tags
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.107
retrieving revision 1.108
diff -C2 -d -r1.107 -r1.108
*** MailParse.pm 10 Apr 2003 21:37:25 -0000 1.107
--- MailParse.pm 10 Apr 2003 22:20:33 -0000 1.108
***************
*** 830,838 ****
print ">>> $line" if $self->{debug};
!
! $colorized .= $self->{ut__};
! $self->{ut__} = '';
!
! $self->{ut__} .= splitline($line, $encoding);
if ($self->{in_headers__}) {
--- 830,840 ----
print ">>> $line" if $self->{debug};
!
! if (!$self->{in_html_tag__}) {
! $colorized .= $self->{ut__};
! $self->{ut__} = '';
! }
!
! $self->{ut__} .= splitline($line, $encoding);
if ($self->{in_headers__}) {
|
|
From: <ssc...@us...> - 2003-04-10 22:11:27
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv3846
Modified Files:
Tag: v0/18/1
MailParse.pm
Log Message:
fix bug with protocol-less href's
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.91.2.1
retrieving revision 1.91.2.2
diff -C2 -d -r1.91.2.1 -r1.91.2.2
*** MailParse.pm 11 Mar 2003 08:08:00 -0000 1.91.2.1
--- MailParse.pm 10 Apr 2003 22:11:23 -0000 1.91.2.2
***************
*** 379,399 ****
# Tags with href attributes
!
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # ftp, http, https
! if ( $value =~ /^(ftp|http|https):\/\//i ) {
! add_url($self, $value, $encoded, $quote, $end_quote, '');
! next;
}
-
- # The less common mailto: goes second, and we only care if this is in an anchor
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
! }
! next;
}
--- 379,399 ----
# Tags with href attributes
!
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # Look for mailto:'s
! if ($value =~ /^mailto:/i) {
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
! }
! } else {
! # Anything that isn't a mailto is probably an URL
!
! $self->add_url($value, $encoded, $quote, $end_quote, '');
}
! next;
}
|
|
From: <ssc...@us...> - 2003-04-10 21:37:31
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv11134
Modified Files:
MailParse.pm
Log Message:
rework bug 696986 fix, prior fix caused UTF errors in Bayes.pm
modify href parsing to detect protocol-less URL's
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.106
retrieving revision 1.107
diff -C2 -d -r1.106 -r1.107
*** MailParse.pm 8 Apr 2003 03:16:44 -0000 1.106
--- MailParse.pm 10 Apr 2003 21:37:25 -0000 1.107
***************
*** 15,19 ****
use MIME::Base64;
use MIME::QuotedPrint;
! require Encode::MIME::Header;
# HTML entity mapping to character codes, this maps things like & to their corresponding
--- 15,19 ----
use MIME::Base64;
use MIME::QuotedPrint;
! #require Encode::MIME::Header;
# HTML entity mapping to character codes, this maps things like & to their corresponding
***************
*** 405,422 ****
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # ftp, http, https
!
! if ( $value =~ /^(ftp|http|https):\/\//i ) {
! add_url($self, $value, $encoded, $quote, $end_quote, '');
! next;
! }
!
! # The less common mailto: goes second, and we only care if this is in an anchor
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
}
! next;
}
--- 405,422 ----
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # Look for mailto:'s
! if ($value =~ /^mailto:/i) {
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
! }
! } else {
! # Anything that isn't a mailto is probably an URL
!
! $self->add_url($value, $encoded, $quote, $end_quote, '');
}
!
! next;
}
***************
*** 657,660 ****
--- 657,661 ----
# $protocol $authinfo $host $port $query $hash may be processed below if desired
+ return 1;
}
***************
*** 1053,1073 ****
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
!
! $mystring = Encode::MIME::Header::decode($Encode::Encoding{'MIME-Header'},$mystring);
! #my $decode_it = '';
! #while ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/ig ) {
! # $decode_it = decode_base64( $1 );
! # $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! #}
! #while ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/ig ) {
! # $decode_it = $1;
! # $decode_it =~ s/\_/=20/g;
! # $decode_it = decode_qp( $decode_it );
! # $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! # }
!
return $mystring;
}
--- 1054,1073 ----
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
! my $decode_it = '';
! while ( $mystring =~ /=\?[\w-]+\?(B|Q)\?(.*)\?=/ig ) {
! if ($1 eq "B") {
! $decode_it = decode_base64( $2 );
! $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! } elsif ($1 eq "Q") {
! $decode_it = $2;
! $decode_it =~ s/\_/=20/g;
! $decode_it = decode_qp( $decode_it );
! $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! }
! }
!
return $mystring;
}
|
|
From: <ssc...@us...> - 2003-04-08 04:23:22
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv7267
Modified Files:
Tag: v0/18/1
Bayes.pm
Log Message:
fixes bug 701981, removes lines in headers consisting of only whitespace (eudora has trouble with them)
Thanks to esniper for the patch
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.107.2.1
retrieving revision 1.107.2.2
diff -C2 -d -r1.107.2.1 -r1.107.2.2
*** Bayes.pm 7 Apr 2003 20:20:15 -0000 1.107.2.1
--- Bayes.pm 8 Apr 2003 04:23:19 -0000 1.107.2.2
***************
*** 809,812 ****
--- 809,817 ----
if ( $getting_headers ) {
+
+ # Kill header lines containing only whitespace (Exim does this)
+
+ next if ( $line =~ /^[ \t]+(\r\n|\r|\n)$/i );
+
if ( !( $line =~ /^(\r\n|\r|\n)$/i ) ) {
$message_size += length $line;
|