Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv21095/Classifier
Modified Files:
Bayes.pm
Log Message:
Add new get_bucket_word_prefixesd API and use it to make the viewing of the contents of an individual bucket work; this completes the core features for v0.20.0.
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.193
retrieving revision 1.194
diff -C2 -d -r1.193 -r1.194
*** Bayes.pm 10 Sep 2003 18:31:39 -0000 1.193
--- Bayes.pm 10 Sep 2003 22:33:57 -0000 1.194
***************
*** 68,71 ****
--- 68,72 ----
# Matrix of buckets, words and the word counts
$self->{matrix__} = {};
+ $self->{db__} = {};
# Total number of words in all buckets
***************
*** 295,298 ****
--- 296,300 ----
untie %{$self->{matrix__}{$bucket}};
delete $self->{matrix__}{$bucket};
+ delete $self->{db__}{$bucket};
}
}
***************
*** 614,620 ****
# tied hash from it thus performing an automatic upgrade.
! tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash",
! -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE;
if ( !defined( $self->get_bucket_word_count( $bucket ) ) ) {
--- 616,622 ----
# tied hash from it thus performing an automatic upgrade.
! $self->{db__}{$bucket} = tie %{$self->{matrix__}{$bucket}}, "BerkeleyDB::Hash",
! -Filename => $self->config_( 'corpus' ) . "/$bucket/table.db",
! -Flags => DB_CREATE;
if ( !defined( $self->get_bucket_word_count( $bucket ) ) ) {
***************
*** 1435,1461 ****
# get_bucket_word_list
#
! # Returns a list of bucket entries, each entry corresponds to all the words with the
! # same leading character
#
# $bucket The name of the bucket for which the word count is desired
#
# ---------------------------------------------------------------------------------------------
sub get_bucket_word_list
{
! my ( $self, $bucket ) = @_;
!
! my @result;
! if ( $self->get_bucket_word_count( $bucket ) > 0 ) {
! # TODO my @entries = @{$self->{matrix__}{$bucket}};
! # TODO for my $i (0..$#entries) {
! # TODO if ( defined( $entries[$i] ) && ( $entries[$i] ne '' ) ) {
! # TODO push @result, ($entries[$i]);
! # TODO }
! # TODO }
! }
! return @result;
}
--- 1437,1468 ----
# get_bucket_word_list
#
! # Returns a list of words all with the same first character
#
# $bucket The name of the bucket for which the word count is desired
+ # $prefix The first character of the words
#
# ---------------------------------------------------------------------------------------------
sub get_bucket_word_list
{
! my ( $self, $bucket, $prefix ) = @_;
! return grep {/^$prefix/} keys %{$self->{matrix__}{$bucket}};
! }
! # ---------------------------------------------------------------------------------------------
! #
! # get_bucket_word_prefixes
! #
! # Returns a list of all the initial letters of words in a bucket
! #
! # $bucket The name of the bucket for which the word count is desired
! #
! # ---------------------------------------------------------------------------------------------
! sub get_bucket_word_prefixes
! {
! my ( $self, $bucket ) = @_;
! my $prev = '';
! return grep {$_ ne $prev && ($prev = $_, 1)} sort map {substr($_,0,1)} keys %{$self->{matrix__}{$bucket}};
}
|