From: Bill M. <whm...@us...> - 2004-05-01 01:24:25
|
Update of /cvsroot/swishe/swish-e/perl In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22856/perl Modified Files: API.pm API.xs typemap Log Message: Here's Jamie Herre's patch from March 2003 that provides access to the list of metas and properties in the index. I also added methods to the SWISH::API module to access this info. TODO -- add reference counting to the swish handle to avoid early destroy. Index: API.pm =================================================================== RCS file: /cvsroot/swishe/swish-e/perl/API.pm,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- API.pm 9 Mar 2004 20:13:22 -0000 1.9 +++ API.pm 1 May 2004 01:24:16 -0000 1.10 @@ -31,14 +31,14 @@ # A short-cut way to search - + my $results = $swish->Query( "foo OR bar" ); # Or more typically my $search = $swish->New_Search_Object; - # then in a loop + # then in a loop my $results = $search->Execute( $query ); # always check for errors (but aborting is not always necessary) @@ -75,6 +75,20 @@ ); } + # display properties and metanames + + for my $index_name ( $swish->IndexNames ) { + my @metas = $swish->MetaList( $index_name ); + my @props = $swish->PropertyList( $index_name ); + + for my $m ( @metas ) { + my $name = $m->Name; + my $id = $m->ID; + my $type = $m->Type; + } + # (repeat above for @props) + } + =head1 DESCRIPTION @@ -140,7 +154,7 @@ =item @values = $swish->HeaderValue( $index_file, $header_name ); A swish-e index has data associated with it stored in the index header. This method -provides access to that data. +provides access to that data. Returns the header value for the header and index file specified. Most headers are a single item, but some headers (e.g. "Stopwords") return a list. @@ -261,7 +275,7 @@ IN_HEAD = 4 In <head> tag IN_BODY = 8 In <body> IN_COMMENTS = 16 In html comments - IN_HEADER = 32 In <h*> + IN_HEADER = 32 In <h*> IN_EMPHASIZED = 64 In <em>, <b>, <strong>, <i> IN_META = 128 In a meta tag (e.g. not swishdefault) @@ -269,7 +283,7 @@ or in the E<lt>titleE<gt> tag use: $search->SetStructure( IN_HEAD | IN_TITLE ); - + =item $search->PhraseDelimiter( $char ); @@ -289,7 +303,7 @@ my $start = time - 48 * 60 * 60; $search->SetSearchLimit( 'swishlastmodified', $start, time() ); -An error will be set if the property +An error will be set if the property has already been specified or if $high > $low. Other errors may not be reported until running the query, such as @@ -426,6 +440,52 @@ =over 4 +=item @metas = $swish->MetaList( $index_name ); + +Swish-e has "MetaNames" which allow searching by fields in the index. +This method returns information about the Metanames. + +Pass in the name of an open index file name and returns a +list of SWISH::API::MetaName objects. Three methods are currently +defined on these objects: + + $meta->Name; + $meta->ID; + $meta->Type; + +Name returns the name of the meta as defined in the MetaNames +config option when the index was created. + +The ID is the internal ID number used to represent the meta name. + +Type is the type of metaname. Currently only one type exists and its +value is zero. + +=item @props = $swish->PropertyList( $index_name ); + +Swish-e can store content or "properties" in the index and return this data +when running a query. +A document's path, URL, title, size, date or summary are examples +of properites. Each property is accessed via its PropertyName. +This method returns information about the PropertNames stored in the index. + +Pass in the name of an open index file name and returns a +list of SWISH::API::MetaName objects. Three methods are currently +defined on these objects: + + $prop->Name; + $prop->ID; + $prop->Type; + +Name returns the name of the meta as defined in the MetaNames +config option when the index was created. + +The ID is the internal ID number used to represent the meta name. + +Type is the type of metaname. Currently only one type exists and its +value is zero. + + =item $stemmed_word = $swish->StemWord( $word ); *Deprecated* @@ -450,7 +510,7 @@ =item $count = $fuzzy_word->WordCount; Returns the number of output words. Normally this is the value one, but may -be more depending on the stemmer used. DoubleMetaphone can return two strings +be more depending on the stemmer used. DoubleMetaphone can return two strings for a single input string. =item $status = $fuzzy_word->WordError; @@ -483,7 +543,7 @@ but care must be taken not to keep objects around too long which can use up memory. Here's an example of a potential problem. Say you have a very large number -of documents indexed and you want to find the first hit for a number of +of documents indexed and you want to find the first hit for a number of popular keywords (error checking omitted in this bad example): sub first_hit { @@ -520,7 +580,7 @@ return $first_hit->Property('swishdocpath'); } -Then when first_hit() sub ends the result list will be freed, and the +Then when first_hit() sub ends the result list will be freed, and the index file closed, thanks to Perl's reference count tracking. Note: the other problem with the above code is that the same index file is Index: API.xs =================================================================== RCS file: /cvsroot/swishe/swish-e/perl/API.xs,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- API.xs 9 Mar 2004 20:13:22 -0000 1.10 +++ API.xs 1 May 2004 01:24:16 -0000 1.11 @@ -192,7 +192,7 @@ SW_HANDLE self -# Return a search object +# Return a search object (uses a typemap to bless the return object) SW_SEARCH New_Search_Object(swish_handle, query = NULL) @@ -234,6 +234,90 @@ } +# Methods to return info about MetaNames and Properties +# The C API provided by Jamie Herre in March 2004 + +# Returns an array of SWISH::API::MetaName objects + +void +SwishMetaList( swish_handle, index_name ) + SW_HANDLE swish_handle + char *index_name + + PREINIT: + char * CLASS = "SWISH::API::MetaName"; + SWISH_META_LIST meta_list; + + PPCODE: + /* Grab the list of pointers */ + meta_list = SwishMetaList( swish_handle, index_name ); + + /* Check for an error -- typically this would be an invalid index name */ + /* Fix: calling with an invalid swish_handle will call progerr */ + if ( SwishError( swish_handle ) ) + croak("%s %s", SwishErrorString( swish_handle ), SwishLastErrorMsg( swish_handle ) ); + + /* Make sure a list is returned and it's not empty */ + if ( !meta_list || !*meta_list ) + XSRETURN_UNDEF; + + + while ( *meta_list ) + { + /* Create a new object */ + SV *meta = sv_newmortal(); + sv_setref_pv( meta, CLASS, (void *)*meta_list ); + + /* and push onto list */ + XPUSHs( meta ); + meta_list++; + + /* $$$ Need to bump the handle ref count here */ + } + + +# Returns an array of SWISH::API::PropertyName objects + +void +SwishPropertyList( swish_handle, index_name ) + SW_HANDLE swish_handle + char *index_name + + PREINIT: + char * CLASS = "SWISH::API::PropertyName"; + SWISH_META_LIST meta_list; + + PPCODE: + /* Grab the list of pointers */ + meta_list = SwishPropertyList( swish_handle, index_name ); + + /* Check for an error -- typically this would be an invalid index name */ + /* Fix: calling with an invalid swish_handle will call progerr */ + if ( SwishError( swish_handle ) ) + croak("%s %s", SwishErrorString( swish_handle ), SwishLastErrorMsg( swish_handle ) ); + + /* Make sure a list is returned and it's not empty */ + if ( !meta_list || !*meta_list ) + XSRETURN_UNDEF; + + + while ( *meta_list ) + { + /* Create a new object */ + SV *meta = sv_newmortal(); + sv_setref_pv( meta, CLASS, (void *)*meta_list ); + + /* and push onto list */ + XPUSHs( meta ); + meta_list++; + + /* $$$ Need to bump the handle ref count here */ + } + + + + + # Misc utility routines void @@ -658,3 +742,55 @@ list++; } +# ******************************************************************** +# +# SWISH::API::MetaName +# +# Methods for accessing data about metanames +# +# ******************************************************************** + +MODULE = SWISH::API PACKAGE = SWISH::API::MetaName PREFIX = SwishMeta + +# Need a DESTROY method to reduce the swish_hande ref count. + + +const char * +SwishMetaName( meta ) + SW_META meta + +int +SwishMetaType( meta ) + SW_META meta + +int +SwishMetaID( meta ) + SW_META meta + + +# ******************************************************************** +# +# SWISH::API::PropertyName +# +# Methods for accessing data about metanames +# Should set a base class for both, but they are small classes +# and may want different behavior in the future. +# +# ******************************************************************** + +MODULE = SWISH::API PACKAGE = SWISH::API::PropertyName PREFIX = SwishMeta + +# Need a DESTROY method to reduce the swish_hande ref count. + + +const char * +SwishMetaName( meta ) + SW_META meta + +int +SwishMetaType( meta ) + SW_META meta + +int +SwishMetaID( meta ) + SW_META meta Index: typemap =================================================================== RCS file: /cvsroot/swishe/swish-e/perl/typemap,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- typemap 9 Mar 2004 20:13:22 -0000 1.3 +++ typemap 1 May 2004 01:24:16 -0000 1.4 @@ -6,6 +6,7 @@ SW_RESULTS O_OBJECT SW_RESULT O_OBJECT SW_FUZZYWORD O_OBJECT +SW_META O_OBJECT const char * T_PV # From: "perlobject.map" Dean Roehrich, version 19960302 |