From: <tho...@us...> - 2008-01-31 18:12:38
|
Revision: 2607 http://clucene.svn.sourceforge.net/clucene/?rev=2607&view=rev Author: thomas_busch Date: 2008-01-31 10:12:34 -0800 (Thu, 31 Jan 2008) Log Message: ----------- first version Added Paths: ----------- trunk/bindings/perl/contributions/analysis/ trunk/bindings/perl/contributions/analysis/cjk/ trunk/bindings/perl/contributions/analysis/cjk/Changes trunk/bindings/perl/contributions/analysis/cjk/LuceneAnalysisCJK.xs trunk/bindings/perl/contributions/analysis/cjk/MANIFEST trunk/bindings/perl/contributions/analysis/cjk/META.yml trunk/bindings/perl/contributions/analysis/cjk/Makefile.PL trunk/bindings/perl/contributions/analysis/cjk/README trunk/bindings/perl/contributions/analysis/cjk/cpp/ trunk/bindings/perl/contributions/analysis/cjk/cpp/utils.cpp trunk/bindings/perl/contributions/analysis/cjk/lib/ trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/ trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/Analysis/ trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/Analysis/CJK.pm trunk/bindings/perl/contributions/analysis/cjk/t/ trunk/bindings/perl/contributions/analysis/cjk/t/00-load.t trunk/bindings/perl/contributions/analysis/cjk/typemap trunk/bindings/perl/contributions/analysis/cjk/xs/ trunk/bindings/perl/contributions/analysis/cjk/xs/CJKAnalyzer.xs trunk/bindings/perl/contributions/analysis/cjk/xs/CJKTokenizer.xs Added: trunk/bindings/perl/contributions/analysis/cjk/Changes =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/Changes (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/Changes 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,28 @@ +Revision history for Lucene + +0.01 2006-04-25 + First version + +0.02 2006-09-30 + Added sort. + Exception handling. + Added close() method for FSDirectory. + Close FSDirectory and RAMDirectory at destruction. + +0.03 2006-11-14 + Added MultiFieldQueryParser. + Exception handling for IndexReader. + Added ability to set Similarity. + +0.04 2006-12-08 + Added UTF-8 support. + Added QueryFilter. + Exception handling for IndexWriter and QueryParser. + Enabled global loading of Lucene shared object. + +0.05 2006-12-18 + Resolved bug when indexing float or interger numbers in field values. + +0.06 2006-12-21 + Exception handling for MultiFieldQueryParser. + Added: trunk/bindings/perl/contributions/analysis/cjk/LuceneAnalysisCJK.xs =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/LuceneAnalysisCJK.xs (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/LuceneAnalysisCJK.xs 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,29 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#ifdef __cplusplus +} +#endif + + +#include "CLucene.h" +#include "CLucene/analysis/cjk/CJKAnalyzer.h" +#include "CLucene/util/Reader.h" + +#include "cpp/utils.cpp" + +typedef lucene::analysis::cjk::CJKAnalyzer CJKAnalyzer; +typedef lucene::analysis::cjk::CJKTokenizer CJKTokenizer; +typedef lucene::util::Reader Reader; + +MODULE = Lucene::Analysis::CJK PACKAGE = Lucene::Analysis::CJK::CJKAnalyzer +INCLUDE: xs/CJKAnalyzer.xs + +MODULE = Lucene::Analysis::CJK PACKAGE = Lucene::Analysis::CJK::CJKTokenizer +INCLUDE: xs/CJKTokenizer.xs + + + Added: trunk/bindings/perl/contributions/analysis/cjk/MANIFEST =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/MANIFEST (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/MANIFEST 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,11 @@ +Changes +MANIFEST +META.yml # Will be created by "make dist" +Makefile.PL +README +lib/Lucene/Analysis/CJK.pm +LuceneAnalysisCJK.xs +xs/CJKAnalyzer.xs +xs/CJKTokenizer.xs +t/00-load.t +typemap Added: trunk/bindings/perl/contributions/analysis/cjk/META.yml =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/META.yml (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/META.yml 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,12 @@ +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: Lucene-Search-Highlight +version: 0.01 +version_from: lib/Lucene/Search/Highlight.pm +installdirs: site +requires: + Lucene: 0.07 + Test::More: 0 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 Added: trunk/bindings/perl/contributions/analysis/cjk/Makefile.PL =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/Makefile.PL (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/Makefile.PL 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,50 @@ +use strict; +use warnings; +use ExtUtils::MakeMaker; + +my $author = 'Thomas Busch <tbusch at cpan dot org>'; + +## hash that specifies the directory to look for CLucene/clucene-config.h +## for each OS +my %h_config_dirs = ( + "linux" => "/usr/lib", + "freebsd" => "/usr/local/lib", + "darwin" => "/usr/local/lib", +); + +my $config_dir = $h_config_dirs{$^O}; + +if (!$config_dir) { + print "==========================================================\n"; + print "CLucene config directory for $^O unknown.\n"; + print "\n"; + print "Please modify Makefile.PL and send a mail to\n"; + print "$author with your modification.\n"; + print "\n"; + print "Thanks for your help.\n"; + print "==========================================================\n"; + exit(0); +} + +WriteMakefile( + NAME => 'Lucene::Analysis::CJK', + AUTHOR => $author, + VERSION_FROM => 'lib/Lucene/Analysis/CJK.pm', + PL_FILES => {}, + PREREQ_PM => { + 'Test::More' => 0, + 'Lucene' => 0.07, + }, + CC => 'g++', + LD => 'g++', + LIBS => ['-lstdc++ -lclucene -lclucene-contrib'], + + ## + INC => "-I$config_dir", + OBJECT => '$(O_FILES)', + + OPTIMIZE => ' ', + dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, + clean => { FILES => 'Lucene-*' }, +); + Added: trunk/bindings/perl/contributions/analysis/cjk/README =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/README (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/README 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,30 @@ +Lucene + +INSTALLATION + +This module requires the clucene library compiled in ASCII mode to be +present in your library path. In order to produce and install such a +library run the following commands: + + wget http://kent.dl.sourceforge.net/sourceforge/clucene/clucene-core-0.9.15.tar.gz + cd clucene-core-0.9.15 + ./autogen.sh + ./configure --disable-debug --prefix=/usr --exec-prefix=/usr --enable-ascii + make + make check + sudo make install + +To install the perl module itself, run the following commands: + + perl Makefile.PL + make + make test + make install + + +COPYRIGHT AND LICENCE + +Copyright (C) 2006 Thomas Busch + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. Added: trunk/bindings/perl/contributions/analysis/cjk/cpp/utils.cpp =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/cpp/utils.cpp (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/cpp/utils.cpp 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,189 @@ + +void +MarkObjCppOwned(SV *obj) +{ + HV *hv = (HV *) SvRV(obj); + SV **sv = hv_fetch(hv, "_cppowned", 9, 0); + if (!sv) + hv_store(hv, "_cppowned", 9, newSViv(1), 0); +} + +bool +IsObjCppOwned(SV *obj) +{ + HV *hv = (HV *) SvRV(obj); + SV **sv = hv_fetch(hv, "_cppowned", 9, 0); + if (!sv) + return false; + return true; +} + +SV* +PtrToSv(const char* CLASS, void* ptr, SV* dest) +{ + HV* new_hv = newHV(); + SV* tmp_rv = newRV_noinc((SV*) new_hv); + hv_store(new_hv, "_objptr", 7, newSViv(PTR2IV(ptr)), 0); + sv_setsv(dest, sv_bless(tmp_rv, gv_stashpv(CLASS, 1))); + SvREFCNT_dec((SV*) tmp_rv); + return dest; +} + +template <class T> +T +SvToPtr(SV* src) +{ + T var = NULL; + if (sv_isobject(src) && SvTYPE(SvRV(src)) == SVt_PVHV) { + HV *hv = (HV *) SvRV(src); + SV **sv = hv_fetch(hv, "_objptr", 7, 0); + if (sv) { + var = INT2PTR(T, SvIV(*sv)); + if (!var) { + warn("${Package}::$func_name(): C++ object pointer is NULL"); + } + } else { + warn("${Package}::$func_name(): key _objptr is missing"); + } + } else { + warn("${Package}::$func_name(): not a blessed hash reference"); + } + return var; +} + +wchar_t* +SvToWChar(SV* arg) +{ + wchar_t* ret; + // Get string length of argument. This works for PV, NV and IV. + // The STRLEN typdef is needed to ensure that this will work correctly + // in a 64-bit environment. + STRLEN arg_len; + SvPV(arg, arg_len); + + // Alloc memory for wide char string. This could be a bit more + // then necessary. + Newz(0, ret, arg_len + 1, wchar_t); + + U8* src = (U8*) SvPV_nolen(arg); + wchar_t* dst = ret; + + if (SvUTF8(arg)) { + // UTF8 to wide char mapping + STRLEN len; + while (*src) { + *dst++ = utf8_to_uvuni(src, &len); + src += len; + } + } else { + // char to wide char mapping + while (*src) { + *dst++ = (wchar_t) *src++; + } + } + *dst = 0; + return ret; +} + +SV* +WCharToSv(wchar_t* src, SV* dest) +{ + U8* dst; + U8* d; + + // Alloc memory for wide char string. This is clearly wider + // then necessary in most cases but no choice. + Newz(0, dst, 3 * wcslen(src) + 1, U8); + + d = dst; + while (*src) { + d = uvuni_to_utf8(d, *src++); + } + *d = 0; + + sv_setpv(dest, (char*) dst); + sv_utf8_decode(dest); + + Safefree(dst); + return dest; +} + + +/* Used by the INPUT typemap for char**. + * Will convert a Perl AV* (containing strings) to a C char**. + */ +char ** XS_unpack_charPtrPtr(SV* rv ) +{ + AV *av; + SV **ssv; + char **s; + int avlen; + int x; + + if( SvROK( rv ) && (SvTYPE(SvRV(rv)) == SVt_PVAV) ) + av = (AV*)SvRV(rv); + else { + warn("XS_unpack_charPtrPtr: rv was not an AV ref"); + return( (char**)NULL ); + } + + /* is it empty? */ + avlen = av_len(av); + if( avlen < 0 ){ + warn("XS_unpack_charPtrPtr: array was empty"); + return( (char**)NULL ); + } + + /* av_len+2 == number of strings, plus 1 for an end-of-array sentinel. + */ + s = (char **)safemalloc( sizeof(char*) * (avlen + 2) ); + if( s == NULL ){ + warn("XS_unpack_charPtrPtr: unable to malloc char**"); + return( (char**)NULL ); + } + for( x = 0; x <= avlen; ++x ){ + ssv = av_fetch( av, x, 0 ); + if( ssv != NULL ){ + if( SvPOK( *ssv ) ){ + s[x] = (char *)safemalloc( SvCUR(*ssv) + 1 ); + if( s[x] == NULL ) + warn("XS_unpack_charPtrPtr: unable to malloc char*"); + else + strcpy( s[x], SvPV( *ssv, PL_na ) ); + } + else + warn("XS_unpack_charPtrPtr: array elem %d was not a string.", x ); + } + else + s[x] = (char*)NULL; + } + s[x] = (char*)NULL; /* sentinel */ + return( s ); +} + +/* Will convert a C char** to a Perl AV* */ +void XS_pack_charPtrPtr(SV* st, char **s) +{ + AV *av = newAV(); + SV *sv; + char **c; + + for( c = s; *c != NULL; ++c ){ + sv = newSVpv( *c, 0 ); + av_push( av, sv ); + } + sv = newSVrv( st, NULL ); /* upgrade stack SV to an RV */ + SvREFCNT_dec( sv ); /* discard */ + SvRV( st ) = (SV*)av; /* make stack RV point at our AV */ +} + + +/* cleanup the temporary char** from XS_unpack_charPtrPtr */ +void XS_release_charPtrPtr(char **s) +{ + char **c; + for( c = s; *c != NULL; ++c ) + safefree( *c ); + safefree( s ); +} + Added: trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/Analysis/CJK.pm =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/Analysis/CJK.pm (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/lib/Lucene/Analysis/CJK.pm 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,100 @@ +package Lucene::Analysis::CJK; +require DynaLoader; +require Exporter; + +use 5.006; +use warnings; +use strict; + +our $VERSION = '0.01'; +our @ISA = qw( Exporter DynaLoader ); +our %EXPORT_TAGS = ( 'all' => [ qw() ] ); +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); +bootstrap Lucene::Analysis::CJK $VERSION; + +# This flag is necessary so that external variables get exported +# On Linux this corresponds to RTLD_GLOBAL of the function dlopen +sub dl_load_flags { 0x01 } + +1; # End of Lucene + +=head1 NAME + +Lucene::Analysis::CJK -- Lucene Analysis classes for CJK languages + +=head1 SYNOPSIS + +=head2 Load CJK analysis classes into namespace + + use Lucene::Analysis::CJK; + +=head1 DESCRIPTION + +Lucene::Analysis::CJK is an extention of the original Lucene package and provides +an Analyzer and a Tokenizer specific for Chinese, Japanese and Korean (CJK). + +=head1 REQUIREMENTS + +This module requires L<Lucene> to be installed. + +=head1 INSTALLATION + +This module requires the clucene contrib library to be installed. The best way to +get it is to go to the following page + + http://sourceforge.net/projects/clucene/ + +and download the latest clucene-contrib version. Currently it is clucene-contrib-0.9.14. +Make sure you compile it with debug disabled and install it in your standard library path. + +On a Linux platform this goes as follows: + + wget http://kent.dl.sourceforge.net/sourceforge/clucene/clucene-contrib-0.9.14.tar.gz + tar xzf clucene-contrib-0.9.14.tar.gz + cd clucene-contrib-0.9.14 + ./autogen.sh + ./configure --disable-debug --prefix=/usr --exec-prefix=/usr + make + make check + (as root) make install + +To install the perl module itself, run the following commands: + + perl Makefile.PL + make + make test + (as root) make install + +=head1 AUTHOR + +Thomas Busch <tbusch at cpan dot org> + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2008 Thomas Busch + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 DISCLAIMER OF WARRANTY + +BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE +SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE +STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE +SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, +YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE +SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO +LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR +THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER +SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +=cut Added: trunk/bindings/perl/contributions/analysis/cjk/t/00-load.t =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/t/00-load.t (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/t/00-load.t 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,9 @@ +#!perl -T + +use Test::More tests => 1; + +BEGIN { + use_ok( 'Lucene::Search::Highlight' ); +} + +diag( "Testing Lucene::Search::Highlight $Lucene::Search::Highlight::VERSION, Perl $], $^X" ); Added: trunk/bindings/perl/contributions/analysis/cjk/typemap =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/typemap (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/typemap 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,102 @@ +TYPEMAP +Reader * T_CPP_CLASS +CJKTokenizer * T_CPP_CLASS +CJKAnalyzer * T_CPP_CLASS + +int32_t T_IV +const int32_t T_IV +int64_t T_IV +wchar_t ** T_WCHAR_ARRAY +const wchar_t ** T_WCHAR_ARRAY +wchar_t_keepalive ** T_WCHAR_ARRAY_KEEPALIVE +const wchar_t_keepalive ** T_WCHAR_ARRAY_KEEPALIVE +wchar_t * T_WCHAR +const wchar_t * T_WCHAR +wchar_t T_WCH +const wchar_t T_WCH + + + +INPUT +T_IV + $var = ($type)SvIV($arg) + +T_CPP_CLASS + { + $var = SvToPtr<$type>($arg); + if (!$var) + XSRETURN_UNDEF; + } + +T_WCHAR + $var = SvToWChar($arg); + SAVEFREEPV($var); + +T_WCH + { + wchar_t *ch = SvToWChar($arg); + $var = ch[0]; + Safefree(ch); + } + +T_WCHAR_ARRAY + { + int i = 0; + int inc = 1; + char **words = XS_unpack_charPtrPtr($arg); + Newz(0, $var, 100, wchar_t*); + while (words[i]) { + ${var}[i] = STRDUP_AtoW(words[i]); + ++i; + if (!(i % 100)) { + ++inc; + Renew($var, (100 * inc), wchar_t*); + } + } + ${var}[i] = NULL; + SAVEFREEPV($var); + } + +T_WCHAR_ARRAY_KEEPALIVE + { + int i = 0; + int inc = 1; + char **words = XS_unpack_charPtrPtr($arg); + Newz(0, $var, 100, wchar_t*); + while (words[i]) { + ${var}[i] = STRDUP_AtoW(words[i]); + ++i; + if (!(i % 100)) { + ++inc; + Renew($var, (100 * inc), wchar_t*); + } + } + ${var}[i] = NULL; + } + +OUTPUT +T_IV + sv_setiv($arg, (IV)$var); +T_CPP_CLASS + { + if (!$var) + XSRETURN_UNDEF; + PtrToSv(CLASS, (void*)$var, $arg); + } + +T_WCHAR + { + wchar_t* src; + if (!$var) + XSRETURN_UNDEF; + src = (wchar_t*) $var; + WCharToSv(src, (SV*)$arg); + } + +T_WCH + { + wchar_t src[2]; + src[0] = (wchar_t)$var; + src[1] = 0; + WCharToSv(src, (SV*)$arg); + } Added: trunk/bindings/perl/contributions/analysis/cjk/xs/CJKAnalyzer.xs =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/xs/CJKAnalyzer.xs (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/xs/CJKAnalyzer.xs 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,14 @@ +CJKAnalyzer * +new(CLASS) +const char* CLASS; + CODE: + RETVAL = new CJKAnalyzer(); + OUTPUT: + RETVAL + +void +DESTROY(self) + CJKAnalyzer * self + CODE: + if (!IsObjCppOwned(ST(0))) + delete self; Added: trunk/bindings/perl/contributions/analysis/cjk/xs/CJKTokenizer.xs =================================================================== --- trunk/bindings/perl/contributions/analysis/cjk/xs/CJKTokenizer.xs (rev 0) +++ trunk/bindings/perl/contributions/analysis/cjk/xs/CJKTokenizer.xs 2008-01-31 18:12:34 UTC (rev 2607) @@ -0,0 +1,20 @@ +CJKTokenizer* +new(CLASS, reader) + const char* CLASS + Reader* reader + CODE: + RETVAL = new CJKTokenizer(reader); + OUTPUT: + RETVAL + CLEANUP: + // Memorize Reader in returned blessed hash reference. + // We don't want it to be destroyed by perl before the C++ object it + // contains gets destroyed by C++. Otherwise this would cause a seg fault. + hv_store((HV *) SvRV(ST(0)), "Reader", 6, newRV(SvRV(ST(1))), 1); + +void +DESTROY(self) + CJKTokenizer* self + CODE: + if (!IsObjCppOwned(ST(0))) + delete self; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |