|
From: <ssc...@us...> - 2003-07-08 01:54:40
|
Update of /cvsroot/popfile/engine
In directory sc8-pr-cvs1:/tmp/cvs-serv12282
Modified Files:
traintest.pl
Log Message:
update for message queue changes to allow traintest.pl to run
Index: traintest.pl
===================================================================
RCS file: /cvsroot/popfile/engine/traintest.pl,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** traintest.pl 29 May 2003 05:49:42 -0000 1.5
--- traintest.pl 8 Jul 2003 01:54:37 -0000 1.6
***************
*** 11,14 ****
--- 11,15 ----
use Classifier::Bayes;
use POPFile::Configuration;
+ use POPFile::MQ;
my $debug = 0;
***************
*** 64,68 ****
{
my ($config) = @_;
!
$config->parameter("csv",$DEFAULT_CSV);
$config->parameter("toe",$DEFAULT_TOE);
--- 65,69 ----
{
my ($config) = @_;
!
$config->parameter("csv",$DEFAULT_CSV);
$config->parameter("toe",$DEFAULT_TOE);
***************
*** 79,84 ****
sub cvs_out
{
! my ($config, $file, @log) = @_;
!
if ($file eq 'auto') {
if ($config->parameter('toe') == 1) {
--- 80,85 ----
sub cvs_out
{
! my ($config, $file, @log) = @_;
!
if ($file eq 'auto') {
if ($config->parameter('toe') == 1) {
***************
*** 90,113 ****
( $config->parameter('window2') ne $DEFAULT_WINDOW2 ) ) {
$file .= "$config->parameter('window1')and$config->parameter('window2')";
!
}
!
if ( $config->parameter('stopwords') ne $DEFAULT_STOP ) {
$file .= "_";
! if ($config->parameter('stopwords') != 1) {
$file .= "no";
}
! $file .= "stop";
}
!
!
$file .= ".csv";
}
!
print STDERR "Printing data to $file\n";
!
open CSV, ">$file";
!
!
$, = ",";
--- 91,114 ----
( $config->parameter('window2') ne $DEFAULT_WINDOW2 ) ) {
$file .= "$config->parameter('window1')and$config->parameter('window2')";
!
}
!
if ( $config->parameter('stopwords') ne $DEFAULT_STOP ) {
$file .= "_";
! if ($config->parameter('stopwords') != 1) {
$file .= "no";
}
! $file .= "stop";
}
!
!
$file .= ".csv";
}
!
print STDERR "Printing data to $file\n";
!
open CSV, ">$file";
!
!
$, = ",";
***************
*** 122,130 ****
print CSV "\n";
}
!
print STDERR "Data saved to $file\n";
!
$, = "";
!
}
--- 123,131 ----
print CSV "\n";
}
!
print STDERR "Data saved to $file\n";
!
$, = "";
!
}
***************
*** 151,155 ****
{
my ($toe,$predicted,$actual) = @_;
!
if ($toe == 1) {
return ($predicted ne $actual);
--- 152,156 ----
{
my ($toe,$predicted,$actual) = @_;
!
if ($toe == 1) {
return ($predicted ne $actual);
***************
*** 167,175 ****
my $dir = $self->{configuration__}->parameter('corpus_out');
mkdir($dir);
!
foreach my $abucket ( keys %{$self->{total__}} ) {
!
print "saving $abucket corpus.\n";
!
my $subdir = $dir;
$subdir .= "/$abucket";
--- 168,176 ----
my $dir = $self->{configuration__}->parameter('corpus_out');
mkdir($dir);
!
foreach my $abucket ( keys %{$self->{total__}} ) {
!
print "saving $abucket corpus.\n";
!
my $subdir = $dir;
$subdir .= "/$abucket";
***************
*** 185,190 ****
}
}
! }
! }
}
--- 186,191 ----
}
}
! }
! }
}
***************
*** 272,278 ****
--- 273,285 ----
my $b = new Classifier::Bayes;
my $c = new POPFile::Configuration;
+ my $mq = new POPFile::MQ;
$b->configuration( $c );
$c->configuration( $c );
+ $mq->configuration( $c);
+
+ $b->mq( $mq );
+ $c->mq( $mq );
+ $mq->mq( $mq );
$c->initialize();
***************
*** 281,296 ****
$c->load_configuration();
!
$c->parse_command_line();
!
# $b->{unclassified} = ($c->parameter('unclassified_probability') || 0.0001);
$b->{unclassified__} = log($c->parameter("bayes_unclassified_probability") || 0.5);
!
! # test with or without stop-words
if ( $c->parameter("stopwords") eq 0 ) {
$b->{parser__}->{mangle__}->{stop__} = {};
$b->{mangler__}->{stop__} = {};
}
!
my $archive = $c->parameter("html_archive_dir");
--- 288,303 ----
$c->load_configuration();
!
$c->parse_command_line();
!
# $b->{unclassified} = ($c->parameter('unclassified_probability') || 0.0001);
$b->{unclassified__} = log($c->parameter("bayes_unclassified_probability") || 0.5);
!
! # test with or without stop-words
if ( $c->parameter("stopwords") eq 0 ) {
$b->{parser__}->{mangle__}->{stop__} = {};
$b->{mangler__}->{stop__} = {};
}
!
my $archive = $c->parameter("html_archive_dir");
***************
*** 355,362 ****
my $message_count = $index + 1;
$index = $#sorted_messages - $index;
! print "\n$self->{messages}->{messages}{ @sorted_messages[$index] }{long}:" if ($debug);
$bucket_class = $b->classify_file( $self->{messages}->{messages}{ @sorted_messages[$index] }{long});
$bucket_true = $self->{messages}->{messages}{ @sorted_messages[$index] }{bucket};
!
$b->{parser__}->{subject__} =~ s/(\r\n|\n|\r)[ \t]+//gm;
--- 362,369 ----
my $message_count = $index + 1;
$index = $#sorted_messages - $index;
! print "\n$self->{messages}->{messages}{ @sorted_messages[$index] }{long}:" if ($debug);
$bucket_class = $b->classify_file( $self->{messages}->{messages}{ @sorted_messages[$index] }{long});
$bucket_true = $self->{messages}->{messages}{ @sorted_messages[$index] }{bucket};
!
$b->{parser__}->{subject__} =~ s/(\r\n|\n|\r)[ \t]+//gm;
***************
*** 425,434 ****
my $end_time = time;
!
if ($c->parameter("dump")) {
dump_corpus($b);
!
}
!
my $total_messages = $#sorted_messages + 1;
--- 432,441 ----
my $end_time = time;
!
if ($c->parameter("dump")) {
dump_corpus($b);
!
}
!
my $total_messages = $#sorted_messages + 1;
|