From: <ik...@us...> - 2009-09-09 00:37:48
|
Revision: 56 http://webfetch.svn.sourceforge.net/webfetch/?rev=56&view=rev Author: ikluft Date: 2009-09-09 00:37:27 +0000 (Wed, 09 Sep 2009) Log Message: ----------- overhaul API Modified Paths: -------------- branches/v0.13/lib/WebFetch/Input/Atom.pm branches/v0.13/lib/WebFetch/Input/PerlStruct.pm branches/v0.13/lib/WebFetch/Input/RSS.pm branches/v0.13/lib/WebFetch/Input/SiteNews.pm branches/v0.13/lib/WebFetch/Output/TT.pm branches/v0.13/lib/WebFetch/Output/TWiki.pm branches/v0.13/lib/WebFetch.pm Added Paths: ----------- branches/v0.13/lib/WebFetch/Data/ branches/v0.13/lib/WebFetch/Data/Record.pm branches/v0.13/lib/WebFetch/Data/Store.pm Added: branches/v0.13/lib/WebFetch/Data/Record.pm =================================================================== --- branches/v0.13/lib/WebFetch/Data/Record.pm (rev 0) +++ branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -0,0 +1,189 @@ +# +# WebFetch::Data::Record - WebFetch Embedding API data record +# +# Copyright (c) 2009 Ian Kluft. This program is free software; you can +# redistribute it and/or modify it under the terms of the GNU General Public +# License Version 3. See http://www.webfetch.org/GPLv3.txt +# + +package WebFetch::Data::Record; + +use strict; +use warnings; +use base qw( WebFetch ); + +# define exceptions/errors +use Exception::Class ( + 'WebFetch::Data::Record::Exception::AutoloadFailure' => { + isa => 'WebFetch::TracedException', + alias => 'throw_autoload_fail', + description => "AUTOLOAD failed to handle function call", + }, + +); + +# no user-servicable parts beyond this point + +=head1 NAME + +WebFetch::Data::Record - Object for management of WebFetch data records/rows + +=head1 SYNOPSIS + +C<use WebFetch::Data::Record; + +WebFetch::Data::Record->mk_field_accessor( $field_name, ... ); +$value = $obj-E<gt>bynum( $num ); +$value = $obj->fieldname; +$obj->fieldname( $value ); +> + +=head1 DESCRIPTION + +This module provides read-only access to a single record of the WebFetch data. + +=cut + +our $AUTOLOAD; + +# initialization +sub init +{ + my $self = shift; + + # save parameters + $self->{obj} = shift; + $self->{num} = shift; + $self->{recref} = $self->{obj}{records}[$self->{num}]; + + # signal WebFetch that Data subclasses do not provide a fetch function + $self->{no_fetch} = 1; + $self->SUPER::init( @_ ); + + # make accessor functions + my $field; + my $class = ref( $self ); + foreach $field ( @{$self->{obj}{fields}}) { + $class->mk_field_accessor( $field ); + } + foreach $field ( keys %{$self->{obj}{wk_names}}) { + $class->mk_field_accessor( $field ); + } + + return $self; +} + +# shortcut function to top-level WebFetch object data +sub data { return $_[0]->{obj}->data; } + +=item $value = $obj->bynum( $field_num ); + +Returns the value of the field located by the field number provided. +The first field is numbered 0. + +=cut + +# get a field by number +sub bynum +{ + my $self = shift; + my $f = shift; + my $num = $self->{num}; + + return $self->{recref}[$f]; +} + +=item $class->mk_field_accessor( $field_name, ... ); + +Creates accessor functions for each field name provided. + +=cut + +# make field accessor/mutator functions +sub mk_field_accessor +{ + my $class = shift; + my $name; + + foreach $name ( @_ ) { + no strict 'refs'; + $class->can( $name ) and next; # skip if function exists! + + # make a closure which keeps value of $name from this call + # keep generic so code can use more than one data type per run + *{$class."::".$name} = sub { + my $self = shift; + my $value = shift; + my $obj = $self->{obj}; + my $recref = $self->{recref}; + my $f; + if ( exists $obj->{findex}{$name}) { + $f = $obj->{findex}{$name}; + if ( defined $value ) { + my $tmp = $recref->[$f]; + $recref->[$f] = $value; + return $tmp; + } else { + return $recref->[$f]; + } + } elsif ( exists $obj->{wk_names}{$name}) { + my $wk = $obj->{wk_names}{$name}; + $f = $obj->{findex}{$wk}; + if ( defined $value ) { + my $tmp = $recref->[$f]; + $recref->[$f] = $value; + return $tmp; + } else { + return $recref->[$f]; + } + } else { + return undef; + } + }; + } +} + +=item accessor functions + +Accessor functions are created for field names and +well-known names as they are defined. + +So a field named "title" can be accessed by an object method of the same +name, like $obj->title . + +=cut + +# AUTOLOAD function to provide field accessors/mutators +sub AUTOLOAD +{ + my $self = shift; + my $type = ref($self) or throw_autoload_fail "self is not an object"; + + my $name = $AUTOLOAD; + $name =~ s/.*://; # strip fully-qualified portion, just want function + + # decline all-caps names - reserved for special Perl functions + ( $name =~ /^[A-Z]+$/ ) and return; + + WebFetch::debug __PACKAGE__."::AUTOLOAD $name"; + if (( exists $self->{obj}{findex}{$name}) + or ( exists $self->{obj}{wk_names}{$name})) + { + $type->mk_field_accessor( $name ); + return $self->$name(@_); + } else { + throw_autoload_fail "no such function or field $name"; + } +} + +1; +__END__ +=head1 AUTHOR + +WebFetch was written by Ian Kluft +Send patches, bug reports, suggestions and questions to +C<ma...@we...>. + +=head1 SEE ALSO + +L<WebFetch>, L<WebFetch::Data::Record> Added: branches/v0.13/lib/WebFetch/Data/Store.pm =================================================================== --- branches/v0.13/lib/WebFetch/Data/Store.pm (rev 0) +++ branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -0,0 +1,356 @@ +# +# WebFetch::Data::Store - WebFetch Embedding API top-level data store +# +# Copyright (c) 2009 Ian Kluft. This program is free software; you can +# redistribute it and/or modify it under the terms of the GNU General Public +# License Version 3. See http://www.webfetch.org/GPLv3.txt +# +# The WebFetch Embedding API manages the following data: +# * {data} - top level hash container (WebFetch::Data::Store) +# * {fields} - array of field names +# * {records} - array of data records (WebFetch::Data::Record) +# * each record is an array of data fields in the order of the field names +# * {wk_names} - hash of WebFetch well-known fields to actual field names +# * {feed} - top-level arbitrary info about the feed +# + +package WebFetch::Data::Store; + +use strict; +use warnings; +use WebFetch; +use base qw( WebFetch ); + +# define exceptions/errors +use Exception::Class ( +); + +# no user-servicable parts beyond this point + +=head1 NAME + +WebFetch::Data::Store - Object for management of WebFetch data + +=head1 SYNOPSIS + +C<use WebFetch::Data::Store; + +$data = webfetch_obj-E<gt>data; +$data-E<gt>add_fields( "field1", "field2", ... ); +$num = $data-E<gt>num_fields; +@field_names = $data-E<gt>get_fields; +$name = $data-E<gt>field_bynum( 3 ); +$data-E<gt>add_wk_names( "title" =E<gt> "heading", "url" =E<gt> "link", ... ); +$value = $data-E<gt>get_feed( $name ); +$data-E<gt>set_feed( $name, $value ); +$data-E<gt>add_record( $field1, $field2, ... ); # order corresponds to add_fields +$num = $data-E<gt>num_records; +$record = $data-E<gt>get_record( $n ); +$data-E<gt>reset_pos; +$record = $data-E<gt>next_record; +$name = $data-E<gt>wk2fname( $wk_name ); +$num = $data-E<gt>fname2fnum( $field_name ); +$num = $data-E<gt>wk2fnum( $wk_name ); +> + +=head1 DESCRIPTION + +This module provides access to the WebFetch data. +WebFetch instantiates the object for the input module. +The input module uses this to construct the data set from its input. +The output module uses the this to access the data and +produce its output object/format. + +=cut + +# initialization +sub init +{ + my $self = shift; + $self->{fields} = []; + $self->{findex} = {}; + $self->{records} = []; + $self->{wk_names} = {}; + $self->{wkindex} = {}; + $self->{feed} = {}; + + # signal WebFetch that Data subclasses do not provide a fetch function + $self->{no_fetch} = 1; + $self->SUPER::init( @_ ); + + return $self; +} + +=item $obj->add_fields( "field1", "field2", ... ); + +Add the field names in the order their values will appear in the data table. + +=cut + +# add field names +sub add_fields +{ + my $self = shift; + my @fields = @_; + my $field; + foreach $field ( @fields ) { + $self->{findex}{$field} = scalar @{$self->{fields}}; + push @{$self->{fields}}, $field; + } +} + +=item $num = $obj->num_fields; + +Returns the number of fields/columns in the data. + +=cut + +# get number of fields +sub num_fields +{ + my $self = shift; + return scalar @{$self->{fields}}; +} + +=item @field_names = $obj->get_fields; + +Gets a list of the field names in the order their values appear in the data +table; + +=cut + +# get field names +sub get_fields +{ + my $self = shift; + return keys %{$self->{fields}}; +} + +=item $field_name = $obj->field_bynum( $num ); + +Return a field name string based on the numeric position of the field. + +=cut + +# get field name by number +sub field_bynum +{ + my $self = shift; + my $num = shift; + return $self->{fields}[$num]; +} + +=item $obj->add_wk_names( "title" => "heading", "url" => "link", ... ); + +Add associations between WebFetch well-known field names, which allows +WebFetch to apply meaning to these fields, such as titles, dates and URLs. +The parameters are pairs of well-known and actual field names. +Running this function more than once will add to the existing associations +of well-known to actual field names. + +=cut + +# add well-known names +sub add_wk_names +{ + my $self = shift; + my ( $wk_name, $field ); + + while ( @_ >= 2 ) { + $wk_name = shift; + $field = shift; + WebFetch::debug "add_wk_names $wk_name => $field"; + $self->{wk_names}{$wk_name} = $field; + $self->{wkindex}{$wk_name} = $self->{findex}{$field}; + } +} + +=item $value = $obj->get_feed( $name ); + +Get an item of per-feed data by name. + +=cut + +# get feed info +sub get_feed +{ + my $self = shift; + my $name = shift; + return (exists $self->{$name}) ? $self->{$name} : undef; +} + +=item $obj->set_feed( $name, $value ); + +Set an item of per-feed data by name and value. + +=cut + +# set feed info +sub set_feed +{ + my $self = shift; + my $name = shift; + my $value = shift; + my $retval = (exists $self->{$name}) ? $self->{$name} : undef; + $self->{$name} = $value; + return $retval; +} + +=item $obj->add_record( $value1, $value2, $value3, ... ); + +Add a row to the end of the data table. Values must correspond to the +positions of the field names that were provided earlier. + +=cut + +# add a data record +# this adds the field values in the same order the field names were added +sub add_record +{ + my $self = shift; + push @{$self->{records}}, [ @_ ]; +} + +# TODO: add a function add_record_unordered( name => value, ... ) +# less efficient, but may be OK for cases where that doesn't matter + +=item $num = $obj->num_records; + +Get the number of records/rows in the data table. + +=cut + +# get the number of data records +sub num_records +{ + my $self = shift; + return scalar @{$self->{records}}; +} + +=item $record = get_record( $num ); + +Returns a WebFetch::Data::Record object for the row located +by the given row number in the data table. The first row is numbered 0. +Calling this function does not affect the position used by the next_record +function. + +=cut + +# get a data record by index +sub get_record +{ + my $self = shift; + my $n = shift; + WebFetch::debug "get_record $n"; + require WebFetch::Data::Record; + return WebFetch::Data::Record->new( $self, $n ); +} + +=item $obj->reset_pos; + +Reset the position counter used by the next_record function back to the +beginning of the data table. + +=cut + +# reset iterator position +sub reset_pos +{ + my $self = shift; + + WebFetch::debug "reset_pos"; + delete $self->{pos}; +} + +=item $record = $obj->next_record; + +The first call to this function returns the first record. +Each successive call to this function returns the following record until +the end of the data table. +After the last record, the function returns undef until +reset_pos is called to reset it back to the beginning. + +=cut + +# get next record +sub next_record +{ + my $self = shift; + + # initialize if necessary + if ( !exists $self->{pos}) { + $self->{pos} = 0; + } + WebFetch::debug "next_record n=".$self->{pos}." of " + .scalar @{$self->{records}}; + + # return undef if position is out of bounds + ( $self->{pos} < 0 ) and return undef; + ( $self->{pos} > scalar @{$self->{records}} - 1 ) and return undef; + + # get record + return $self->get_record( $self->{pos}++ ); +} + +=item $obj->wk2fname( $wk ) + +Obtain a field name from a well-known name. + +=cut + +# convert well-known name to field name +sub wk2fname +{ + my ( $self, $wk ) = @_; + + WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef"); + return ( exists $self->{wk_names}{$wk}) + ? $self->{wk_names}{$wk} + : undef; +} + +=item $obj->fname2fnum( $fname ) + +Obtain a field number from a field name. + +=cut + +# convert a field name to a field number +sub fname2fnum +{ + my ( $self, $fname ) = @_; + + WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" ); + return ( exists $self->{findex}{$fname}) + ? $self->{findex}{$fname} + : undef; +} + +=item $obj->wk2fnum( $wk ) + +Obtain a field number from a well-known name. + +=cut + +# convert well-known name to field number +sub wk2fnum +{ + my ( $self, $wk ) = @_; + + WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" ); + return ( exists $self->{wkindex}{$wk}) + ? $self->{wkindex}{$wk} + : undef; +} + +1; +__END__ +=head1 AUTHOR + +WebFetch was written by Ian Kluft +Send patches, bug reports, suggestions and questions to +C<ma...@we...>. + +=head1 SEE ALSO + +L<WebFetch>, L<WebFetch::Data::Record> Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm =================================================================== --- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -80,21 +80,17 @@ my ( $self ) = @_; # set up Webfetch Embedding API data - $self->{data} = {}; - $self->{data}{fields} = [ "id", "updated", "title", "author", "link", - "summary", "content", "xml" ]; + $self->data->add_fields( "id", "updated", "title", "author", "link", + "summary", "content", "xml" ); # defined which fields match to which "well-known field names" - $self->{data}{wk_names} = { + $self->data->add_wk_names( "id" => "id", "title" => "title", "url" => "link", "date" => "updated", "summary" => "summary", - }; - $self->{data}{records} = []; + ); - # process the links - # parse data file $self->parse_input(); @@ -143,9 +139,8 @@ my $summary = extract_value( $entry->summary() ); my $content = extract_value( $entry->content() ); my $xml = $entry->as_xml(); - push @{$self->{data}{records}}, - [ $id, $updated, $title, $author, $link, $summary, - $content, $xml ]; + $self->data->add_record( $id, $updated, $title, + $author, $link, $summary, $content, $xml ); } } Modified: branches/v0.13/lib/WebFetch/Input/PerlStruct.pm =================================================================== --- branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -44,45 +44,21 @@ if ( !defined $self->{content}) { throw_nostruct "content struct does not exist"; } - if ( ref($self->{content}) eq "HASH" ) { + if ( ref($self->{content})->isa( "WebFetch::Data::Store" )) { + $self->{data} = $self->{content}; + return; + } elsif ( ref($self->{content}) eq "HASH" ) { if (( exists $self->{content}{fields}) and ( exists $self->{content}{records}) and ( exists $self->{content}{wk_names})) { - # it's already in WebFetch Embedding data format - $self->{data} = $self->{content}; + $self->data->{fields} = $self->{content}{fields}; + $self->data->{wk_names} = $self->{content}{wk_names}; + $self->data->{records} = $self->{content}{records}; return; } } - if ( ref($self->{content}) ne "ARRAY" ) { - throw_badstruct "content is not an ARRAY ref " - ."or WebFetch Embedding API data"; - } - - # build data structure - $self->{data} = {}; - $self->{data}{fields} = [ sort keys %{$self->{content}[0]}]; - $self->{data}{records} = []; - my ( $entry, $field ); - foreach $entry ( @{$self->{content}}) { - my @new_entry; - foreach $field ( @{$self->{data}{fields}}) { - push @new_entry, ( exists $entry->{$field}) - ? $entry->{$field} : ""; - } - push @{$self->{data}{records}}, \@new_entry; - } - - # map the well-known field names - if ( defined $self->{wk_names}) { - $self->{data}{wk_names} = $self->{wk_names}; - } else { - # if not provided, fake it the best we can - $self->{data}{wk_names} = {}; - foreach $field ( @{$self->{data}{fields}}) { - $self->{data}{wk_names}{$field} = $field; - } - } + throw_badstruct "content should be a WebFetch::Data::Store"; } 1; Modified: branches/v0.13/lib/WebFetch/Input/RSS.pm =================================================================== --- branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -54,18 +54,16 @@ my ( $self ) = @_; # set up Webfetch Embedding API data - $self->{data} = {}; - $self->{data}{fields} = [ "pubDate", "title", "link", "category", - "description" ]; + $self->data->add_fields( "pubDate", "title", "link", "category", + "description" ); # defined which fields match to which "well-known field names" - $self->{data}{wk_names} = { + $self->data->add_wk_names( "title" => "title", "url" => "link", "date" => "pubDate", "summary" => "description", "category" => "category", - }; - $self->{data}{records} = []; + ); # parse data file $self->parse_input(); @@ -163,8 +161,8 @@ ? $item->{category} : ""; my $description = ( defined $item->{description}) ? $item->{description} : ""; - push @{$self->{data}{records}}, - [ $pub_date, $title, $link, $category, $description ]; + $self->data->add_record( $pub_date, $title, $link, + $category, $description ); $pos++; } } Modified: branches/v0.13/lib/WebFetch/Input/SiteNews.pm =================================================================== --- branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -84,19 +84,17 @@ } # set up Webfetch Embedding API data - $self->{data} = {}; $self->{actions} = {}; - $self->{data}{fields} = [ "date", "title", "priority", "expired", - "position", "label", "url", "category", "text" ]; + $self->data->add_fields( "date", "title", "priority", "expired", + "position", "label", "url", "category", "text" ); # defined which fields match to which "well-known field names" - $self->{data}{wk_names} = { + $self->data->add_wk_names( "title" => "title", "url" => "url", "date" => "date", "summary" => "text", "category" => "category" - }; - $self->{data}{records} = []; + ); # process the links @@ -263,10 +261,10 @@ my $text = ( defined $item->{text}) ? $item->{text} : ""; my $url_prefix = ( defined $self->{url_prefix}) ? $self->{url_prefix} : ""; - push @{$self->{data}{records}}, - [ printstamp($posted), $title, priority( $item ), + $self->data->add_record( + printstamp($posted), $title, priority( $item ), expired( $item ), $pos, $label, - $url_prefix."#".$label, $category, $text ]; + $url_prefix."#".$label, $category, $text ); $pos++; } } Modified: branches/v0.13/lib/WebFetch/Output/TT.pm =================================================================== --- branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -80,7 +80,8 @@ my $template = Template->new( \%tt_config ); # process template - $template->process( $self->{template}, $self->{data}, \$output ) + $template->process( $self->{template}, { data => $self->{data}}, + \$output, { binmode => ':utf8'} ) or throw_template $template->error(); $self->raw_savable( $filename, $output ); Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm =================================================================== --- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -50,6 +50,11 @@ description => "WebFetch::Output::TWiki returned errors from " ."saving one or more entries", }, + "WebFetch::Output::TWiki::Exception::NoIdentifier" => { + isa => "WebFetch::Exception", + alias => "throw_no_id_field", + description => "no identifier field could be found", + }, ); =head1 NAME @@ -249,10 +254,13 @@ my $id_field = $self->wk2fnum( "id" ); if ( ! defined $id_field ) { $id_field = $self->wk2fnum( "url" ); - } elsif ( ! defined $id_field ) { + } + if ( ! defined $id_field ) { $id_field = $self->wk2fnum( "title" ); } - WebFetch::debug "write_to_twiki: id_field=$id_field"; + if ( ! defined $id_field ) { + throw_no_id_field "no usable identifier field was found"; + } # open DB file for tracking unique IDs of articles already processed my %id_index; @@ -272,13 +280,14 @@ # create topics with metadata from each WebFetch data record my $entry; my @oopses; - foreach $entry ( @{$self->{data}{records}}) { + $self->data->reset_pos; + while ( $entry = $self->data->next_record ) { # check that this entry hasn't already been forwarded to TWiki - if ( exists $id_index{$entry->[$id_field]}) { + if ( exists $id_index{$entry->bynum( $id_field )}) { next; } - $id_index{$entry->[$id_field]} = time; + $id_index{$entry->bynum( $id_field )} = time; # select topic name my $topicname = sprintf $tnum_format, $tnum_counter; @@ -298,20 +307,21 @@ { name => $config->{parent}}); $meta->put( "FORM", { name => $config->{form}}); my $fnum; - for ( $fnum = 0; $fnum <= @{$self->{data}{fields}}; $fnum++ ) { - WebFetch::debug "meta: ".$self->{data}{fields}[$fnum] - ." = ".$entry->[$fnum]; - ( defined $self->{data}{fields}[$fnum]) or next; - ( $self->{data}{fields}[$fnum] eq "xml") and next; - ( defined $entry->[$fnum]) or next; + for ( $fnum = 0; $fnum <= $self->data->num_fields; $fnum++ ) { + WebFetch::debug "meta: " + .$self->data->field_bynum($fnum) + ." = ".$entry->bynum($fnum); + ( defined $self->data->field_bynum($fnum)) or next; + ( $self->data->field_bynum($fnum) eq "xml") and next; + ( defined $entry->bynum($fnum)) or next; WebFetch::debug "meta: OK"; $meta->putKeyed( "FIELD", { - name => $self->{data}{fields}[$fnum], - value => $entry->[$fnum] }); + name => $self->data->field_bynum($fnum), + value => $entry->bynum($fnum)}); } # save a special title field for TWiki indexes - my $index_title = $entry->[$self->wk2fnum( "title" )]; + my $index_title = $entry->title; $index_title =~ s/[\t\r\n\|]+/ /gs; $index_title =~ s/^\s*//; $index_title =~ s/\s*$//; @@ -329,8 +339,8 @@ $topicname, $meta, $text ); if ( $oopsurl ) { WebFetch::debug "write_to_twiki: $topicname - $oopsurl"; - push @oopses, $entry->[$self->wk2fnum( "title" ) or 0] - ." -> ".$topicname." ".$oopsurl; + push @oopses, $entry->title." -> " + .$topicname." ".$oopsurl; } } Modified: branches/v0.13/lib/WebFetch.pm =================================================================== --- branches/v0.13/lib/WebFetch.pm 2009-09-07 21:51:29 UTC (rev 55) +++ branches/v0.13/lib/WebFetch.pm 2009-09-09 00:37:27 UTC (rev 56) @@ -100,12 +100,10 @@ use strict; -use Carp; use Getopt::Long; use LWP::UserAgent; use HTTP::Request; use Date::Calc; -use Data::Dumper; # define exceptions/errors use Exception::Class ( @@ -114,6 +112,12 @@ isa => 'WebFetch::Exception', }, + 'WebFetch::Exception::DataWrongType' => { + isa => 'WebFetch::TracedException', + alias => 'throw_data_wrongtype', + description => "provided data must be a WebFetch::Data::Store", + }, + 'WebFetch::Exception::GetoptError' => { isa => 'WebFetch::Exception', alias => 'throw_getopt_error', @@ -174,16 +178,16 @@ description => "no module was found to run the request", }, - 'WebFetch::Exception::AutoRunFailure' => { + 'WebFetch::Exception::AutoloadFailure' => { isa => 'WebFetch::TracedException', alias => 'throw_autoload_fail', - description => "AUTORUN failed to handle function call", + description => "AUTOLOAD failed to handle function call", }, ); # initialize class variables -our $VERSION = '0.13-pre29'; +our $VERSION = '0.13-pre39'; our %default_modules = ( "input" => { "rss" => "WebFetch::Input::RSS", @@ -558,7 +562,16 @@ # go fetch the data # this function must be provided by a derived module + # non-fetching modules (i.e. data) must define $self->{no_fetch}=1 if (( ! exists $self->{no_fetch}) or ! $self->{no_fetch}) { + require WebFetch::Data::Store; + if ( exists $self->{data}) { + $self->{data}->isa( "WebFetch::Data::Store" ) + or throw_data_wrongtype "object data must be " + ."a WebFetch::Data::Store"; + } else { + $self->{data} = WebFetch::Data::Store->new(); + } $self->fetch(); } @@ -990,7 +1003,7 @@ foreach $entry ( @{$self->{actions}{$action_spec}}) { # parameters must be in an ARRAY ref if (ref $entry ne "ARRAY" ) { - carp "warning: entry in action spec " + warn "warning: entry in action spec " ."\"".$action_spec."\"" ."expected to be ARRAY, found " .(ref $entry)." instead " @@ -1007,7 +1020,7 @@ # it will be reported by $self->save() } } else { - carp "warning: action \"$action_spec\" specified but " + warn "warning: action \"$action_spec\" specified but " ."\&{\$self->$action_handler}() " ."not defined in " .(ref $self)." - ignored\n"; @@ -1325,7 +1338,6 @@ if ( $self->{debug} ) { print STDERR "entering save()\n"; - #Dumper($self); } # check if we have attributes needed to proceed @@ -1344,10 +1356,11 @@ if (( exists $self->{fetch_urls}) and $self->{fetch_urls}) { my $url_fnum = $self->wk2fnum( "url" ); my $entry; - foreach $entry ( @{$self->{data}{records}}) { - if ( defined $entry->[$url_fnum]) { - $self->direct_fetch_savable( - $entry->[$url_fnum]); + $self->data->reset_pos; + while ( $entry = $self->data->next_record()) { + my $url = $entry->url; + if ( defined $url ) { + $self->direct_fetch_savable( $entry->url ); } } } @@ -1356,8 +1369,8 @@ my $savable; foreach $savable ( @{$self->{savable}}) { - if ( $self->{debug} ) { - print STDERR "saving ".$savable->{file}."\n"; + if ( exists $savable->{file}) { + debug "saving ".$savable->{file}."\n"; } # an output module may have handled a more intricate operation @@ -1431,9 +1444,8 @@ } # write content to the "new content" file - if ( ! open ( new_content, ">$new_content" )) { - $savable->{error} = "cannot open " - .$new_content.": $!"; + if ( ! open ( new_content, ">:utf8", "$new_content" )) { + $savable->{error} = "cannot open $new_content: $!"; next; } if ( !print new_content $savable->{content}) { @@ -1529,148 +1541,14 @@ } # -# functions to support format handlers +# shortcuts to data object functions # -# initialize an internal hash of field names to field numbers -sub init_fname2fnum -{ - my ( $self ) = @_; +sub data { my $self = shift; return $self->{data}; } +sub wk2fname { my $self = shift; return $self->data->wk2fname( @_ )}; +sub fname2fnum { my $self = shift; return $self->data->fname2fnum( @_ )}; +sub wk2fnum { my $self = shift; return $self->data->wk2fnum( @_ )}; - # check if fname2fnum is already initialized - if (( exists $self->{fname2fnum}) - and ref $self->{fname2fnum} eq "HASH" ) - { - # already done - success - return 1; - } - - # check if prerequisite data exists - if (( ! exists $self->{data} ) - or ( ! exists $self->{data}{fields})) - { - # missing prerequisites - failed - return 0; - } - - # initialize the fname2fnum hash - my $i; - $self->{fname2fnum} = {}; - for ( $i=0; $i < scalar(@{$self->{data}{fields}}); $i++ ) { - # put the field number in as the value for the hash - $self->{fname2fnum}{$self->{data}{fields}[$i]} = $i; - } - - # OK, done - return 1; -} - -# initialize an internal hash of well-known names to field numbers -sub init_wk2fnum -{ - my ( $self ) = @_; - - $self->init_fname2fnum() or return 0; - - # check if wk2fnum is already initialized - if (( exists $self->{wk2fnum}) - and ref $self->{wk2fnum} eq "HASH" ) - { - # already done - success - return 1; - } - - # check for prerequisite data - if ( ! exists $self->{data}{wk_names}) { - return 0; - } - - my $wk_key; - $self->{wk2fnum} = {}; - foreach $wk_key ( keys %{$self->{data}{wk_names}}) { - # perform consistency cross-check between wk_names and fields - if ( !exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}}) - { - # wk_names has a bad field name - carp about it! - carp "warning: wk_names contains $wk_key"."->" - .$self->{data}{wk_names}{$wk_key} - ." but " - .$self->{data}{wk_names}{$wk_key} - ." is not in the fields list - ignored\n"; - } else { - # it's OK - put it in the table - $self->{wk2fnum}{$wk_key} = - $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}}; - } - } - return 1; -} - -=item $obj->wk2fname( $wk ) - -=cut - -# convert well-known name to field name -sub wk2fname -{ - my ( $self, $wk ) = @_; - - $self->init_fname2fnum() or return undef; - - # check for prerequisite data - if (( ! exists $self->{data}{wk_names}) - or ( ! exists $self->{data}{wk_names}{$wk})) - { - return undef; - } - - # double check that the field exists before pronouncing it OK - # (perform consistency cross-check between wk_names and fields) - if ( exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk}}) { - return $self->{data}{wk_names}{$wk}; - } - - # otherwise, wk_names has a bad field name. - # But init_wk2fnum() may have already carped about it - # so check whether we need to carp about it or not. - if ( ! exists $self->{wk2fnum}) { - carp "warning: wk_names contains $wk"."->" - .$self->{data}{wk_names}{$wk} - ." but " - .$self->{data}{wk_names}{$wk} - ." is not in the fields list - ignored\n"; - } - return undef; -} - -=item $obj->fname2fnum( $fname ) - -=cut - -# convert a field name to a field number -sub fname2fnum -{ - my ( $self, $fname ) = @_; - - $self->init_fname2fnum() or return undef; - return ( exists $self->{fname2fnum}{$fname}) - ? $self->{fname2fnum}{$fname} : undef; -} - -=item $obj->wk2fnum( $wk ) - -=cut - -# convert well-known name to field number -sub wk2fnum -{ - my ( $self, $wk ) = @_; - - $self->init_wk2fnum() or return undef; - return ( exists $self->{wk2fnum}{$wk}) - ? $self->{wk2fnum}{$wk} : undef; -} - =item AUTOLOAD functionality When a WebFetch input object is passed to an output class, operations @@ -1693,12 +1571,9 @@ $name =~ s/.*://; # strip fully-qualified portion, just want function # decline all-caps names - reserved for special Perl functions - # if Perl core didn't handle this name, we won't mess with it either my ( $package, $filename, $line ) = caller; - if ( $name =~ /^[A-Z]+$/ ) { - throw_autoload_fail "reserved function $name declined " - ." - called by $package ($filename line $line)"; - } + ( $name =~ /^[A-Z]+$/ ) and return; + debug __PACKAGE__."::AUTOLOAD $name"; # check for function in caller package # (WebFetch may hand an input module's object to an output module) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |