|
From: Oliver M. B. <ol...@fa...> - 2005-01-02 10:52:32
|
Hi!
My site uses a language (Japanese) that is not supported by Lingua::Stem
and though I've switched off stemming, Slash::Utility:::Data::findWords()
chokes trying to initialize Lingua::Stem.
Below is a patch that makes the initialization of the stemmer and all
operations on it depend on the stem_uncommon_words var. Please review and
apply.
diff -u -r1.6 Data.pm
--- ./Slash/Utility/Data/Data.pm 31 Dec 2004 12:35:48 -0000 1.6
+++ ./Slash/Utility/Data/Data.pm 2 Jan 2005 10:51:07 -0000
@@ -3243,11 +3243,14 @@
my $use_stemming = $constants->{stem_uncommon_words};
my $language = $constants->{rdflanguage} || "EN-US";
$language = uc($language);
- my $stemmer = Lingua::Stem->new(-locale => $language);
- $stemmer->stem_caching({ -level => 2 });
+ my $stemmer;
my $text_return_hr = {};
my @word_stems;
+ if ($use_stemming){
+ $stemmer = Lingua::Stem->new(-locale => $language);
+ $stemmer->stem_caching({ -level => 2 });
+ }
# Return a hashref; keys are the words, values are hashrefs
# with the number of times they appear and so on.
@@ -3348,7 +3351,7 @@
$wordcount->{$word}{count}++;
}
}
- $stemmer->clear_stem_cache();
+ $stemmer->clear_stem_cache() if $use_stemming;
return $wordcount;
}
--
Oliver M. Bolzer
GPG (PGP) Fingerprint = 621B 52F6 2AC1 36DB 8761 018F 8786 87AD EF50 D1FF
|