|
From: <ik...@us...> - 2009-09-09 00:37:48
|
Revision: 56
http://webfetch.svn.sourceforge.net/webfetch/?rev=56&view=rev
Author: ikluft
Date: 2009-09-09 00:37:27 +0000 (Wed, 09 Sep 2009)
Log Message:
-----------
overhaul API
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/Atom.pm
branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
branches/v0.13/lib/WebFetch/Input/RSS.pm
branches/v0.13/lib/WebFetch/Input/SiteNews.pm
branches/v0.13/lib/WebFetch/Output/TT.pm
branches/v0.13/lib/WebFetch/Output/TWiki.pm
branches/v0.13/lib/WebFetch.pm
Added Paths:
-----------
branches/v0.13/lib/WebFetch/Data/
branches/v0.13/lib/WebFetch/Data/Record.pm
branches/v0.13/lib/WebFetch/Data/Store.pm
Added: branches/v0.13/lib/WebFetch/Data/Record.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Record.pm (rev 0)
+++ branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -0,0 +1,189 @@
+#
+# WebFetch::Data::Record - WebFetch Embedding API data record
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+
+package WebFetch::Data::Record;
+
+use strict;
+use warnings;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+ 'WebFetch::Data::Record::Exception::AutoloadFailure' => {
+ isa => 'WebFetch::TracedException',
+ alias => 'throw_autoload_fail',
+ description => "AUTOLOAD failed to handle function call",
+ },
+
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Record - Object for management of WebFetch data records/rows
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Record;
+
+WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
+$value = $obj-E<gt>bynum( $num );
+$value = $obj->fieldname;
+$obj->fieldname( $value );
+>
+
+=head1 DESCRIPTION
+
+This module provides read-only access to a single record of the WebFetch data.
+
+=cut
+
+our $AUTOLOAD;
+
+# initialization
+sub init
+{
+ my $self = shift;
+
+ # save parameters
+ $self->{obj} = shift;
+ $self->{num} = shift;
+ $self->{recref} = $self->{obj}{records}[$self->{num}];
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ # make accessor functions
+ my $field;
+ my $class = ref( $self );
+ foreach $field ( @{$self->{obj}{fields}}) {
+ $class->mk_field_accessor( $field );
+ }
+ foreach $field ( keys %{$self->{obj}{wk_names}}) {
+ $class->mk_field_accessor( $field );
+ }
+
+ return $self;
+}
+
+# shortcut function to top-level WebFetch object data
+sub data { return $_[0]->{obj}->data; }
+
+=item $value = $obj->bynum( $field_num );
+
+Returns the value of the field located by the field number provided.
+The first field is numbered 0.
+
+=cut
+
+# get a field by number
+sub bynum
+{
+ my $self = shift;
+ my $f = shift;
+ my $num = $self->{num};
+
+ return $self->{recref}[$f];
+}
+
+=item $class->mk_field_accessor( $field_name, ... );
+
+Creates accessor functions for each field name provided.
+
+=cut
+
+# make field accessor/mutator functions
+sub mk_field_accessor
+{
+ my $class = shift;
+ my $name;
+
+ foreach $name ( @_ ) {
+ no strict 'refs';
+ $class->can( $name ) and next; # skip if function exists!
+
+ # make a closure which keeps value of $name from this call
+ # keep generic so code can use more than one data type per run
+ *{$class."::".$name} = sub {
+ my $self = shift;
+ my $value = shift;
+ my $obj = $self->{obj};
+ my $recref = $self->{recref};
+ my $f;
+ if ( exists $obj->{findex}{$name}) {
+ $f = $obj->{findex}{$name};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } elsif ( exists $obj->{wk_names}{$name}) {
+ my $wk = $obj->{wk_names}{$name};
+ $f = $obj->{findex}{$wk};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } else {
+ return undef;
+ }
+ };
+ }
+}
+
+=item accessor functions
+
+Accessor functions are created for field names and
+well-known names as they are defined.
+
+So a field named "title" can be accessed by an object method of the same
+name, like $obj->title .
+
+=cut
+
+# AUTOLOAD function to provide field accessors/mutators
+sub AUTOLOAD
+{
+ my $self = shift;
+ my $type = ref($self) or throw_autoload_fail "self is not an object";
+
+ my $name = $AUTOLOAD;
+ $name =~ s/.*://; # strip fully-qualified portion, just want function
+
+ # decline all-caps names - reserved for special Perl functions
+ ( $name =~ /^[A-Z]+$/ ) and return;
+
+ WebFetch::debug __PACKAGE__."::AUTOLOAD $name";
+ if (( exists $self->{obj}{findex}{$name})
+ or ( exists $self->{obj}{wk_names}{$name}))
+ {
+ $type->mk_field_accessor( $name );
+ return $self->$name(@_);
+ } else {
+ throw_autoload_fail "no such function or field $name";
+ }
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Added: branches/v0.13/lib/WebFetch/Data/Store.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Store.pm (rev 0)
+++ branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -0,0 +1,356 @@
+#
+# WebFetch::Data::Store - WebFetch Embedding API top-level data store
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+# The WebFetch Embedding API manages the following data:
+# * {data} - top level hash container (WebFetch::Data::Store)
+# * {fields} - array of field names
+# * {records} - array of data records (WebFetch::Data::Record)
+# * each record is an array of data fields in the order of the field names
+# * {wk_names} - hash of WebFetch well-known fields to actual field names
+# * {feed} - top-level arbitrary info about the feed
+#
+
+package WebFetch::Data::Store;
+
+use strict;
+use warnings;
+use WebFetch;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Store - Object for management of WebFetch data
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Store;
+
+$data = webfetch_obj-E<gt>data;
+$data-E<gt>add_fields( "field1", "field2", ... );
+$num = $data-E<gt>num_fields;
+@field_names = $data-E<gt>get_fields;
+$name = $data-E<gt>field_bynum( 3 );
+$data-E<gt>add_wk_names( "title" =E<gt> "heading", "url" =E<gt> "link", ... );
+$value = $data-E<gt>get_feed( $name );
+$data-E<gt>set_feed( $name, $value );
+$data-E<gt>add_record( $field1, $field2, ... ); # order corresponds to add_fields
+$num = $data-E<gt>num_records;
+$record = $data-E<gt>get_record( $n );
+$data-E<gt>reset_pos;
+$record = $data-E<gt>next_record;
+$name = $data-E<gt>wk2fname( $wk_name );
+$num = $data-E<gt>fname2fnum( $field_name );
+$num = $data-E<gt>wk2fnum( $wk_name );
+>
+
+=head1 DESCRIPTION
+
+This module provides access to the WebFetch data.
+WebFetch instantiates the object for the input module.
+The input module uses this to construct the data set from its input.
+The output module uses the this to access the data and
+produce its output object/format.
+
+=cut
+
+# initialization
+sub init
+{
+ my $self = shift;
+ $self->{fields} = [];
+ $self->{findex} = {};
+ $self->{records} = [];
+ $self->{wk_names} = {};
+ $self->{wkindex} = {};
+ $self->{feed} = {};
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ return $self;
+}
+
+=item $obj->add_fields( "field1", "field2", ... );
+
+Add the field names in the order their values will appear in the data table.
+
+=cut
+
+# add field names
+sub add_fields
+{
+ my $self = shift;
+ my @fields = @_;
+ my $field;
+ foreach $field ( @fields ) {
+ $self->{findex}{$field} = scalar @{$self->{fields}};
+ push @{$self->{fields}}, $field;
+ }
+}
+
+=item $num = $obj->num_fields;
+
+Returns the number of fields/columns in the data.
+
+=cut
+
+# get number of fields
+sub num_fields
+{
+ my $self = shift;
+ return scalar @{$self->{fields}};
+}
+
+=item @field_names = $obj->get_fields;
+
+Gets a list of the field names in the order their values appear in the data
+table;
+
+=cut
+
+# get field names
+sub get_fields
+{
+ my $self = shift;
+ return keys %{$self->{fields}};
+}
+
+=item $field_name = $obj->field_bynum( $num );
+
+Return a field name string based on the numeric position of the field.
+
+=cut
+
+# get field name by number
+sub field_bynum
+{
+ my $self = shift;
+ my $num = shift;
+ return $self->{fields}[$num];
+}
+
+=item $obj->add_wk_names( "title" => "heading", "url" => "link", ... );
+
+Add associations between WebFetch well-known field names, which allows
+WebFetch to apply meaning to these fields, such as titles, dates and URLs.
+The parameters are pairs of well-known and actual field names.
+Running this function more than once will add to the existing associations
+of well-known to actual field names.
+
+=cut
+
+# add well-known names
+sub add_wk_names
+{
+ my $self = shift;
+ my ( $wk_name, $field );
+
+ while ( @_ >= 2 ) {
+ $wk_name = shift;
+ $field = shift;
+ WebFetch::debug "add_wk_names $wk_name => $field";
+ $self->{wk_names}{$wk_name} = $field;
+ $self->{wkindex}{$wk_name} = $self->{findex}{$field};
+ }
+}
+
+=item $value = $obj->get_feed( $name );
+
+Get an item of per-feed data by name.
+
+=cut
+
+# get feed info
+sub get_feed
+{
+ my $self = shift;
+ my $name = shift;
+ return (exists $self->{$name}) ? $self->{$name} : undef;
+}
+
+=item $obj->set_feed( $name, $value );
+
+Set an item of per-feed data by name and value.
+
+=cut
+
+# set feed info
+sub set_feed
+{
+ my $self = shift;
+ my $name = shift;
+ my $value = shift;
+ my $retval = (exists $self->{$name}) ? $self->{$name} : undef;
+ $self->{$name} = $value;
+ return $retval;
+}
+
+=item $obj->add_record( $value1, $value2, $value3, ... );
+
+Add a row to the end of the data table. Values must correspond to the
+positions of the field names that were provided earlier.
+
+=cut
+
+# add a data record
+# this adds the field values in the same order the field names were added
+sub add_record
+{
+ my $self = shift;
+ push @{$self->{records}}, [ @_ ];
+}
+
+# TODO: add a function add_record_unordered( name => value, ... )
+# less efficient, but may be OK for cases where that doesn't matter
+
+=item $num = $obj->num_records;
+
+Get the number of records/rows in the data table.
+
+=cut
+
+# get the number of data records
+sub num_records
+{
+ my $self = shift;
+ return scalar @{$self->{records}};
+}
+
+=item $record = get_record( $num );
+
+Returns a WebFetch::Data::Record object for the row located
+by the given row number in the data table. The first row is numbered 0.
+Calling this function does not affect the position used by the next_record
+function.
+
+=cut
+
+# get a data record by index
+sub get_record
+{
+ my $self = shift;
+ my $n = shift;
+ WebFetch::debug "get_record $n";
+ require WebFetch::Data::Record;
+ return WebFetch::Data::Record->new( $self, $n );
+}
+
+=item $obj->reset_pos;
+
+Reset the position counter used by the next_record function back to the
+beginning of the data table.
+
+=cut
+
+# reset iterator position
+sub reset_pos
+{
+ my $self = shift;
+
+ WebFetch::debug "reset_pos";
+ delete $self->{pos};
+}
+
+=item $record = $obj->next_record;
+
+The first call to this function returns the first record.
+Each successive call to this function returns the following record until
+the end of the data table.
+After the last record, the function returns undef until
+reset_pos is called to reset it back to the beginning.
+
+=cut
+
+# get next record
+sub next_record
+{
+ my $self = shift;
+
+ # initialize if necessary
+ if ( !exists $self->{pos}) {
+ $self->{pos} = 0;
+ }
+ WebFetch::debug "next_record n=".$self->{pos}." of "
+ .scalar @{$self->{records}};
+
+ # return undef if position is out of bounds
+ ( $self->{pos} < 0 ) and return undef;
+ ( $self->{pos} > scalar @{$self->{records}} - 1 ) and return undef;
+
+ # get record
+ return $self->get_record( $self->{pos}++ );
+}
+
+=item $obj->wk2fname( $wk )
+
+Obtain a field name from a well-known name.
+
+=cut
+
+# convert well-known name to field name
+sub wk2fname
+{
+ my ( $self, $wk ) = @_;
+
+ WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef");
+ return ( exists $self->{wk_names}{$wk})
+ ? $self->{wk_names}{$wk}
+ : undef;
+}
+
+=item $obj->fname2fnum( $fname )
+
+Obtain a field number from a field name.
+
+=cut
+
+# convert a field name to a field number
+sub fname2fnum
+{
+ my ( $self, $fname ) = @_;
+
+ WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" );
+ return ( exists $self->{findex}{$fname})
+ ? $self->{findex}{$fname}
+ : undef;
+}
+
+=item $obj->wk2fnum( $wk )
+
+Obtain a field number from a well-known name.
+
+=cut
+
+# convert well-known name to field number
+sub wk2fnum
+{
+ my ( $self, $wk ) = @_;
+
+ WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" );
+ return ( exists $self->{wkindex}{$wk})
+ ? $self->{wkindex}{$wk}
+ : undef;
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -80,21 +80,17 @@
my ( $self ) = @_;
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "id", "updated", "title", "author", "link",
- "summary", "content", "xml" ];
+ $self->data->add_fields( "id", "updated", "title", "author", "link",
+ "summary", "content", "xml" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"id" => "id",
"title" => "title",
"url" => "link",
"date" => "updated",
"summary" => "summary",
- };
- $self->{data}{records} = [];
+ );
- # process the links
-
# parse data file
$self->parse_input();
@@ -143,9 +139,8 @@
my $summary = extract_value( $entry->summary() );
my $content = extract_value( $entry->content() );
my $xml = $entry->as_xml();
- push @{$self->{data}{records}},
- [ $id, $updated, $title, $author, $link, $summary,
- $content, $xml ];
+ $self->data->add_record( $id, $updated, $title,
+ $author, $link, $summary, $content, $xml );
}
}
Modified: branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -44,45 +44,21 @@
if ( !defined $self->{content}) {
throw_nostruct "content struct does not exist";
}
- if ( ref($self->{content}) eq "HASH" ) {
+ if ( ref($self->{content})->isa( "WebFetch::Data::Store" )) {
+ $self->{data} = $self->{content};
+ return;
+ } elsif ( ref($self->{content}) eq "HASH" ) {
if (( exists $self->{content}{fields})
and ( exists $self->{content}{records})
and ( exists $self->{content}{wk_names}))
{
- # it's already in WebFetch Embedding data format
- $self->{data} = $self->{content};
+ $self->data->{fields} = $self->{content}{fields};
+ $self->data->{wk_names} = $self->{content}{wk_names};
+ $self->data->{records} = $self->{content}{records};
return;
}
}
- if ( ref($self->{content}) ne "ARRAY" ) {
- throw_badstruct "content is not an ARRAY ref "
- ."or WebFetch Embedding API data";
- }
-
- # build data structure
- $self->{data} = {};
- $self->{data}{fields} = [ sort keys %{$self->{content}[0]}];
- $self->{data}{records} = [];
- my ( $entry, $field );
- foreach $entry ( @{$self->{content}}) {
- my @new_entry;
- foreach $field ( @{$self->{data}{fields}}) {
- push @new_entry, ( exists $entry->{$field})
- ? $entry->{$field} : "";
- }
- push @{$self->{data}{records}}, \@new_entry;
- }
-
- # map the well-known field names
- if ( defined $self->{wk_names}) {
- $self->{data}{wk_names} = $self->{wk_names};
- } else {
- # if not provided, fake it the best we can
- $self->{data}{wk_names} = {};
- foreach $field ( @{$self->{data}{fields}}) {
- $self->{data}{wk_names}{$field} = $field;
- }
- }
+ throw_badstruct "content should be a WebFetch::Data::Store";
}
1;
Modified: branches/v0.13/lib/WebFetch/Input/RSS.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -54,18 +54,16 @@
my ( $self ) = @_;
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "pubDate", "title", "link", "category",
- "description" ];
+ $self->data->add_fields( "pubDate", "title", "link", "category",
+ "description" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "link",
"date" => "pubDate",
"summary" => "description",
"category" => "category",
- };
- $self->{data}{records} = [];
+ );
# parse data file
$self->parse_input();
@@ -163,8 +161,8 @@
? $item->{category} : "";
my $description = ( defined $item->{description})
? $item->{description} : "";
- push @{$self->{data}{records}},
- [ $pub_date, $title, $link, $category, $description ];
+ $self->data->add_record( $pub_date, $title, $link,
+ $category, $description );
$pos++;
}
}
Modified: branches/v0.13/lib/WebFetch/Input/SiteNews.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -84,19 +84,17 @@
}
# set up Webfetch Embedding API data
- $self->{data} = {};
$self->{actions} = {};
- $self->{data}{fields} = [ "date", "title", "priority", "expired",
- "position", "label", "url", "category", "text" ];
+ $self->data->add_fields( "date", "title", "priority", "expired",
+ "position", "label", "url", "category", "text" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "url",
"date" => "date",
"summary" => "text",
"category" => "category"
- };
- $self->{data}{records} = [];
+ );
# process the links
@@ -263,10 +261,10 @@
my $text = ( defined $item->{text}) ? $item->{text} : "";
my $url_prefix = ( defined $self->{url_prefix})
? $self->{url_prefix} : "";
- push @{$self->{data}{records}},
- [ printstamp($posted), $title, priority( $item ),
+ $self->data->add_record(
+ printstamp($posted), $title, priority( $item ),
expired( $item ), $pos, $label,
- $url_prefix."#".$label, $category, $text ];
+ $url_prefix."#".$label, $category, $text );
$pos++;
}
}
Modified: branches/v0.13/lib/WebFetch/Output/TT.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -80,7 +80,8 @@
my $template = Template->new( \%tt_config );
# process template
- $template->process( $self->{template}, $self->{data}, \$output )
+ $template->process( $self->{template}, { data => $self->{data}},
+ \$output, { binmode => ':utf8'} )
or throw_template $template->error();
$self->raw_savable( $filename, $output );
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -50,6 +50,11 @@
description => "WebFetch::Output::TWiki returned errors from "
."saving one or more entries",
},
+ "WebFetch::Output::TWiki::Exception::NoIdentifier" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_no_id_field",
+ description => "no identifier field could be found",
+ },
);
=head1 NAME
@@ -249,10 +254,13 @@
my $id_field = $self->wk2fnum( "id" );
if ( ! defined $id_field ) {
$id_field = $self->wk2fnum( "url" );
- } elsif ( ! defined $id_field ) {
+ }
+ if ( ! defined $id_field ) {
$id_field = $self->wk2fnum( "title" );
}
- WebFetch::debug "write_to_twiki: id_field=$id_field";
+ if ( ! defined $id_field ) {
+ throw_no_id_field "no usable identifier field was found";
+ }
# open DB file for tracking unique IDs of articles already processed
my %id_index;
@@ -272,13 +280,14 @@
# create topics with metadata from each WebFetch data record
my $entry;
my @oopses;
- foreach $entry ( @{$self->{data}{records}}) {
+ $self->data->reset_pos;
+ while ( $entry = $self->data->next_record ) {
# check that this entry hasn't already been forwarded to TWiki
- if ( exists $id_index{$entry->[$id_field]}) {
+ if ( exists $id_index{$entry->bynum( $id_field )}) {
next;
}
- $id_index{$entry->[$id_field]} = time;
+ $id_index{$entry->bynum( $id_field )} = time;
# select topic name
my $topicname = sprintf $tnum_format, $tnum_counter;
@@ -298,20 +307,21 @@
{ name => $config->{parent}});
$meta->put( "FORM", { name => $config->{form}});
my $fnum;
- for ( $fnum = 0; $fnum <= @{$self->{data}{fields}}; $fnum++ ) {
- WebFetch::debug "meta: ".$self->{data}{fields}[$fnum]
- ." = ".$entry->[$fnum];
- ( defined $self->{data}{fields}[$fnum]) or next;
- ( $self->{data}{fields}[$fnum] eq "xml") and next;
- ( defined $entry->[$fnum]) or next;
+ for ( $fnum = 0; $fnum <= $self->data->num_fields; $fnum++ ) {
+ WebFetch::debug "meta: "
+ .$self->data->field_bynum($fnum)
+ ." = ".$entry->bynum($fnum);
+ ( defined $self->data->field_bynum($fnum)) or next;
+ ( $self->data->field_bynum($fnum) eq "xml") and next;
+ ( defined $entry->bynum($fnum)) or next;
WebFetch::debug "meta: OK";
$meta->putKeyed( "FIELD", {
- name => $self->{data}{fields}[$fnum],
- value => $entry->[$fnum] });
+ name => $self->data->field_bynum($fnum),
+ value => $entry->bynum($fnum)});
}
# save a special title field for TWiki indexes
- my $index_title = $entry->[$self->wk2fnum( "title" )];
+ my $index_title = $entry->title;
$index_title =~ s/[\t\r\n\|]+/ /gs;
$index_title =~ s/^\s*//;
$index_title =~ s/\s*$//;
@@ -329,8 +339,8 @@
$topicname, $meta, $text );
if ( $oopsurl ) {
WebFetch::debug "write_to_twiki: $topicname - $oopsurl";
- push @oopses, $entry->[$self->wk2fnum( "title" ) or 0]
- ." -> ".$topicname." ".$oopsurl;
+ push @oopses, $entry->title." -> "
+ .$topicname." ".$oopsurl;
}
}
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -100,12 +100,10 @@
use strict;
-use Carp;
use Getopt::Long;
use LWP::UserAgent;
use HTTP::Request;
use Date::Calc;
-use Data::Dumper;
# define exceptions/errors
use Exception::Class (
@@ -114,6 +112,12 @@
isa => 'WebFetch::Exception',
},
+ 'WebFetch::Exception::DataWrongType' => {
+ isa => 'WebFetch::TracedException',
+ alias => 'throw_data_wrongtype',
+ description => "provided data must be a WebFetch::Data::Store",
+ },
+
'WebFetch::Exception::GetoptError' => {
isa => 'WebFetch::Exception',
alias => 'throw_getopt_error',
@@ -174,16 +178,16 @@
description => "no module was found to run the request",
},
- 'WebFetch::Exception::AutoRunFailure' => {
+ 'WebFetch::Exception::AutoloadFailure' => {
isa => 'WebFetch::TracedException',
alias => 'throw_autoload_fail',
- description => "AUTORUN failed to handle function call",
+ description => "AUTOLOAD failed to handle function call",
},
);
# initialize class variables
-our $VERSION = '0.13-pre29';
+our $VERSION = '0.13-pre39';
our %default_modules = (
"input" => {
"rss" => "WebFetch::Input::RSS",
@@ -558,7 +562,16 @@
# go fetch the data
# this function must be provided by a derived module
+ # non-fetching modules (i.e. data) must define $self->{no_fetch}=1
if (( ! exists $self->{no_fetch}) or ! $self->{no_fetch}) {
+ require WebFetch::Data::Store;
+ if ( exists $self->{data}) {
+ $self->{data}->isa( "WebFetch::Data::Store" )
+ or throw_data_wrongtype "object data must be "
+ ."a WebFetch::Data::Store";
+ } else {
+ $self->{data} = WebFetch::Data::Store->new();
+ }
$self->fetch();
}
@@ -990,7 +1003,7 @@
foreach $entry ( @{$self->{actions}{$action_spec}}) {
# parameters must be in an ARRAY ref
if (ref $entry ne "ARRAY" ) {
- carp "warning: entry in action spec "
+ warn "warning: entry in action spec "
."\"".$action_spec."\""
."expected to be ARRAY, found "
.(ref $entry)." instead "
@@ -1007,7 +1020,7 @@
# it will be reported by $self->save()
}
} else {
- carp "warning: action \"$action_spec\" specified but "
+ warn "warning: action \"$action_spec\" specified but "
."\&{\$self->$action_handler}() "
."not defined in "
.(ref $self)." - ignored\n";
@@ -1325,7 +1338,6 @@
if ( $self->{debug} ) {
print STDERR "entering save()\n";
- #Dumper($self);
}
# check if we have attributes needed to proceed
@@ -1344,10 +1356,11 @@
if (( exists $self->{fetch_urls}) and $self->{fetch_urls}) {
my $url_fnum = $self->wk2fnum( "url" );
my $entry;
- foreach $entry ( @{$self->{data}{records}}) {
- if ( defined $entry->[$url_fnum]) {
- $self->direct_fetch_savable(
- $entry->[$url_fnum]);
+ $self->data->reset_pos;
+ while ( $entry = $self->data->next_record()) {
+ my $url = $entry->url;
+ if ( defined $url ) {
+ $self->direct_fetch_savable( $entry->url );
}
}
}
@@ -1356,8 +1369,8 @@
my $savable;
foreach $savable ( @{$self->{savable}}) {
- if ( $self->{debug} ) {
- print STDERR "saving ".$savable->{file}."\n";
+ if ( exists $savable->{file}) {
+ debug "saving ".$savable->{file}."\n";
}
# an output module may have handled a more intricate operation
@@ -1431,9 +1444,8 @@
}
# write content to the "new content" file
- if ( ! open ( new_content, ">$new_content" )) {
- $savable->{error} = "cannot open "
- .$new_content.": $!";
+ if ( ! open ( new_content, ">:utf8", "$new_content" )) {
+ $savable->{error} = "cannot open $new_content: $!";
next;
}
if ( !print new_content $savable->{content}) {
@@ -1529,148 +1541,14 @@
}
#
-# functions to support format handlers
+# shortcuts to data object functions
#
-# initialize an internal hash of field names to field numbers
-sub init_fname2fnum
-{
- my ( $self ) = @_;
+sub data { my $self = shift; return $self->{data}; }
+sub wk2fname { my $self = shift; return $self->data->wk2fname( @_ )};
+sub fname2fnum { my $self = shift; return $self->data->fname2fnum( @_ )};
+sub wk2fnum { my $self = shift; return $self->data->wk2fnum( @_ )};
- # check if fname2fnum is already initialized
- if (( exists $self->{fname2fnum})
- and ref $self->{fname2fnum} eq "HASH" )
- {
- # already done - success
- return 1;
- }
-
- # check if prerequisite data exists
- if (( ! exists $self->{data} )
- or ( ! exists $self->{data}{fields}))
- {
- # missing prerequisites - failed
- return 0;
- }
-
- # initialize the fname2fnum hash
- my $i;
- $self->{fname2fnum} = {};
- for ( $i=0; $i < scalar(@{$self->{data}{fields}}); $i++ ) {
- # put the field number in as the value for the hash
- $self->{fname2fnum}{$self->{data}{fields}[$i]} = $i;
- }
-
- # OK, done
- return 1;
-}
-
-# initialize an internal hash of well-known names to field numbers
-sub init_wk2fnum
-{
- my ( $self ) = @_;
-
- $self->init_fname2fnum() or return 0;
-
- # check if wk2fnum is already initialized
- if (( exists $self->{wk2fnum})
- and ref $self->{wk2fnum} eq "HASH" )
- {
- # already done - success
- return 1;
- }
-
- # check for prerequisite data
- if ( ! exists $self->{data}{wk_names}) {
- return 0;
- }
-
- my $wk_key;
- $self->{wk2fnum} = {};
- foreach $wk_key ( keys %{$self->{data}{wk_names}}) {
- # perform consistency cross-check between wk_names and fields
- if ( !exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}})
- {
- # wk_names has a bad field name - carp about it!
- carp "warning: wk_names contains $wk_key"."->"
- .$self->{data}{wk_names}{$wk_key}
- ." but "
- .$self->{data}{wk_names}{$wk_key}
- ." is not in the fields list - ignored\n";
- } else {
- # it's OK - put it in the table
- $self->{wk2fnum}{$wk_key} =
- $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}};
- }
- }
- return 1;
-}
-
-=item $obj->wk2fname( $wk )
-
-=cut
-
-# convert well-known name to field name
-sub wk2fname
-{
- my ( $self, $wk ) = @_;
-
- $self->init_fname2fnum() or return undef;
-
- # check for prerequisite data
- if (( ! exists $self->{data}{wk_names})
- or ( ! exists $self->{data}{wk_names}{$wk}))
- {
- return undef;
- }
-
- # double check that the field exists before pronouncing it OK
- # (perform consistency cross-check between wk_names and fields)
- if ( exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk}}) {
- return $self->{data}{wk_names}{$wk};
- }
-
- # otherwise, wk_names has a bad field name.
- # But init_wk2fnum() may have already carped about it
- # so check whether we need to carp about it or not.
- if ( ! exists $self->{wk2fnum}) {
- carp "warning: wk_names contains $wk"."->"
- .$self->{data}{wk_names}{$wk}
- ." but "
- .$self->{data}{wk_names}{$wk}
- ." is not in the fields list - ignored\n";
- }
- return undef;
-}
-
-=item $obj->fname2fnum( $fname )
-
-=cut
-
-# convert a field name to a field number
-sub fname2fnum
-{
- my ( $self, $fname ) = @_;
-
- $self->init_fname2fnum() or return undef;
- return ( exists $self->{fname2fnum}{$fname})
- ? $self->{fname2fnum}{$fname} : undef;
-}
-
-=item $obj->wk2fnum( $wk )
-
-=cut
-
-# convert well-known name to field number
-sub wk2fnum
-{
- my ( $self, $wk ) = @_;
-
- $self->init_wk2fnum() or return undef;
- return ( exists $self->{wk2fnum}{$wk})
- ? $self->{wk2fnum}{$wk} : undef;
-}
-
=item AUTOLOAD functionality
When a WebFetch input object is passed to an output class, operations
@@ -1693,12 +1571,9 @@
$name =~ s/.*://; # strip fully-qualified portion, just want function
# decline all-caps names - reserved for special Perl functions
- # if Perl core didn't handle this name, we won't mess with it either
my ( $package, $filename, $line ) = caller;
- if ( $name =~ /^[A-Z]+$/ ) {
- throw_autoload_fail "reserved function $name declined "
- ." - called by $package ($filename line $line)";
- }
+ ( $name =~ /^[A-Z]+$/ ) and return;
+ debug __PACKAGE__."::AUTOLOAD $name";
# check for function in caller package
# (WebFetch may hand an input module's object to an output module)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|