You can subscribe to this list here.
| 2001 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
|
Dec
|
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
|
Feb
(1) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(12) |
Sep
(25) |
Oct
(1) |
Nov
|
Dec
|
|
From: <ik...@us...> - 2009-10-02 23:16:16
|
Revision: 70
http://webfetch.svn.sourceforge.net/webfetch/?rev=70&view=rev
Author: ikluft
Date: 2009-10-02 23:16:06 +0000 (Fri, 02 Oct 2009)
Log Message:
-----------
make pod-coverage tests pass for WebFetch::Output::TWiki
Modified Paths:
--------------
branches/v0.14/lib/WebFetch/Output/TWiki.pm
branches/v0.14/t/pod-coverage.t
Modified: branches/v0.14/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.14/lib/WebFetch/Output/TWiki.pm 2009-09-28 23:51:58 UTC (rev 69)
+++ branches/v0.14/lib/WebFetch/Output/TWiki.pm 2009-10-02 23:16:06 UTC (rev 70)
@@ -490,6 +490,12 @@
}
}
+=item $obj->fmt_handler_twiki( $filename )
+
+This function outputs the data to the metadata on a page on a TWiki system.
+
+=cut
+
# TWiki format handler
sub fmt_handler_twiki
{
Modified: branches/v0.14/t/pod-coverage.t
===================================================================
--- branches/v0.14/t/pod-coverage.t 2009-09-28 23:51:58 UTC (rev 69)
+++ branches/v0.14/t/pod-coverage.t 2009-10-02 23:16:06 UTC (rev 70)
@@ -25,7 +25,7 @@
"WebFetch::Data::Record" => [ qw( new init data ) ],
"WebFetch::Output::TT" => [ qw( new fetch ) ],
"WebFetch::Output::Dump" => [ qw( new fetch ) ],
- "WebFetch::Output::TWiki" => [ qw( new fetch ) ],
+ "WebFetch::Output::TWiki" => [ qw( new fetch get_twiki_config write_to_twiki write_to_twiki_metadata write_to_twiki_topics ) ],
"WebFetch::Input::RSS" => [ qw( new fetch extract_value parse_input
parse_rss printstamp ) ],
"WebFetch::Input::Atom" => [ qw( new fetch extract_value
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-28 23:52:08
|
Revision: 69
http://webfetch.svn.sourceforge.net/webfetch/?rev=69&view=rev
Author: ikluft
Date: 2009-09-28 23:51:58 +0000 (Mon, 28 Sep 2009)
Log Message:
-----------
new branch 0.14
Added Paths:
-----------
branches/v0.14/
Property changes on: branches/v0.14
___________________________________________________________________
Added: svn:mergeinfo
+ /branches/v0.13:44-67
/trunk:38
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-21 04:59:30
|
Revision: 68
http://webfetch.svn.sourceforge.net/webfetch/?rev=68&view=rev
Author: ikluft
Date: 2009-09-21 04:59:20 +0000 (Mon, 21 Sep 2009)
Log Message:
-----------
merge 0.13 to trunk
Modified Paths:
--------------
trunk/MANIFEST
trunk/lib/WebFetch/Input/Atom.pm
trunk/lib/WebFetch/Input/PerlStruct.pm
trunk/lib/WebFetch/Input/RSS.pm
trunk/lib/WebFetch/Input/SiteNews.pm
trunk/lib/WebFetch/Output/Dump.pm
trunk/lib/WebFetch/Output/TT.pm
trunk/lib/WebFetch.pm
trunk/t/00-load.t
trunk/t/pod-coverage.t
Added Paths:
-----------
trunk/lib/WebFetch/Data/
trunk/lib/WebFetch/Data/Record.pm
trunk/lib/WebFetch/Data/Store.pm
trunk/lib/WebFetch/Output/TWiki.pm
Removed Paths:
-------------
trunk/lib/WebFetch/Data/Record.pm
trunk/lib/WebFetch/Data/Store.pm
Property Changed:
----------------
trunk/
Property changes on: trunk
___________________________________________________________________
Modified: svn:mergeinfo
- /trunk:38
+ /branches/v0.13:44-67
/trunk:38
Modified: trunk/MANIFEST
===================================================================
--- trunk/MANIFEST 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/MANIFEST 2009-09-21 04:59:20 UTC (rev 68)
@@ -11,3 +11,10 @@
lib/WebFetch/Input/SiteNews.pm
lib/WebFetch/Output/Dump.pm
lib/WebFetch/Output/TT.pm
+lib/WebFetch/Output/TWiki.pm
+lib/WebFetch/Data/Store.pm
+lib/WebFetch/Data/Record.pm
+t/00-load.t
+t/boilerplate.t
+t/pod-coverage.t
+t/pod.t
Deleted: trunk/lib/WebFetch/Data/Record.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Data/Record.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -1,212 +0,0 @@
-#
-# WebFetch::Data::Record - WebFetch Embedding API data record
-#
-# Copyright (c) 2009 Ian Kluft. This program is free software; you can
-# redistribute it and/or modify it under the terms of the GNU General Public
-# License Version 3. See http://www.webfetch.org/GPLv3.txt
-#
-
-package WebFetch::Data::Record;
-
-use strict;
-use warnings;
-use base qw( WebFetch );
-
-# define exceptions/errors
-use Exception::Class (
- 'WebFetch::Data::Record::Exception::AutoloadFailure' => {
- isa => 'WebFetch::TracedException',
- alias => 'throw_autoload_fail',
- description => "AUTOLOAD failed to handle function call",
- },
-
-);
-
-# no user-servicable parts beyond this point
-
-=head1 NAME
-
-WebFetch::Data::Record - Object for management of WebFetch data records/rows
-
-=head1 SYNOPSIS
-
-C<use WebFetch::Data::Record;>
-
-C<WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
-$value = $obj-E<gt>bynum( $num );
-$value = $obj->fieldname;
-$obj->fieldname( $value );
->
-
-=head1 DESCRIPTION
-
-This module provides read-only access to a single record of the WebFetch data.
-
-=cut
-
-our $AUTOLOAD;
-
-# initialization
-sub init
-{
- my $self = shift;
-
- # save parameters
- $self->{obj} = shift;
- $self->{num} = shift;
- $self->{recref} = $self->{obj}{records}[$self->{num}];
-
- # signal WebFetch that Data subclasses do not provide a fetch function
- $self->{no_fetch} = 1;
- $self->SUPER::init( @_ );
-
- # make accessor functions
- my $field;
- my $class = ref( $self );
- foreach $field ( @{$self->{obj}{fields}}) {
- $class->mk_field_accessor( $field );
- }
- foreach $field ( keys %{$self->{obj}{wk_names}}) {
- $class->mk_field_accessor( $field );
- }
-
- return $self;
-}
-
-# shortcut function to top-level WebFetch object data
-sub data { return $_[0]->{obj}; }
-
-=item $value = $obj->bynum( $field_num );
-
-Returns the value of the field located by the field number provided.
-The first field is numbered 0.
-
-=cut
-
-# get a field by number
-sub bynum
-{
- my $self = shift;
- my $f = shift;
-
- WebFetch::debug "bynum $f";
- return $self->{recref}[$f];
-}
-
-=item $value = $obj->byname( $field_name );
-
-Returns the value of the named field.
-
-=cut
-
-# get a field by name
-sub byname
-{
- my $self = shift;
- my $fname = shift;
- my $obj = $self->{obj};
- my $f;
-
- WebFetch::debug "byname ".(( defined $fname ) ? $fname : "undef");
- ( defined $fname ) or return undef;
- if ( exists $obj->{findex}{$fname}) {
- $f = $obj->{findex}{$fname};
- return $self->{recref}[$f];
- }
- return undef;
-}
-
-=item $class->mk_field_accessor( $field_name, ... );
-
-Creates accessor functions for each field name provided.
-
-=cut
-
-# make field accessor/mutator functions
-sub mk_field_accessor
-{
- my $class = shift;
- my $name;
-
- foreach $name ( @_ ) {
- no strict 'refs';
- $class->can( $name ) and next; # skip if function exists!
-
- # make a closure which keeps value of $name from this call
- # keep generic so code can use more than one data type per run
- *{$class."::".$name} = sub {
- my $self = shift;
- my $value = shift;
- my $obj = $self->{obj};
- my $recref = $self->{recref};
- my $f;
- if ( exists $obj->{findex}{$name}) {
- $f = $obj->{findex}{$name};
- if ( defined $value ) {
- my $tmp = $recref->[$f];
- $recref->[$f] = $value;
- return $tmp;
- } else {
- return $recref->[$f];
- }
- } elsif ( exists $obj->{wk_names}{$name}) {
- my $wk = $obj->{wk_names}{$name};
- $f = $obj->{findex}{$wk};
- if ( defined $value ) {
- my $tmp = $recref->[$f];
- $recref->[$f] = $value;
- return $tmp;
- } else {
- return $recref->[$f];
- }
- } else {
- return undef;
- }
- };
- }
-}
-
-=item accessor functions
-
-Accessor functions are created for field names and
-well-known names as they are defined.
-
-So a field named "title" can be accessed by an object method of the same
-name, like $obj->title .
-
-=cut
-
-# AUTOLOAD function to provide field accessors/mutators
-sub AUTOLOAD
-{
- my $self = shift;
- my $type = ref($self) or throw_autoload_fail "self is not an object";
-
- my $name = $AUTOLOAD;
- $name =~ s/.*://; # strip fully-qualified portion, just want function
-
- # decline all-caps names - reserved for special Perl functions
- ( $name =~ /^[A-Z]+$/ ) and return;
-
- WebFetch::debug __PACKAGE__."::AUTOLOAD $name";
- if (( exists $self->{obj}{findex}{$name})
- or ( exists $self->{obj}{wk_names}{$name}))
- {
- $type->mk_field_accessor( $name );
- return $self->$name(@_);
- } else {
- throw_autoload_fail "no such function or field $name";
- }
-}
-
-1;
-__END__
-=head1 AUTHOR
-
-WebFetch was written by Ian Kluft
-Send patches, bug reports, suggestions and questions to
-C<ma...@we...>.
-
-=head1 SEE ALSO
-
-L<WebFetch>, L<WebFetch::Data::Record>
Copied: trunk/lib/WebFetch/Data/Record.pm (from rev 67, branches/v0.13/lib/WebFetch/Data/Record.pm)
===================================================================
--- trunk/lib/WebFetch/Data/Record.pm (rev 0)
+++ trunk/lib/WebFetch/Data/Record.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -0,0 +1,212 @@
+#
+# WebFetch::Data::Record - WebFetch Embedding API data record
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+
+package WebFetch::Data::Record;
+
+use strict;
+use warnings;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+ 'WebFetch::Data::Record::Exception::AutoloadFailure' => {
+ isa => 'WebFetch::TracedException',
+ alias => 'throw_autoload_fail',
+ description => "AUTOLOAD failed to handle function call",
+ },
+
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Record - Object for management of WebFetch data records/rows
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Record;>
+
+C<WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
+$value = $obj-E<gt>bynum( $num );
+$value = $obj->fieldname;
+$obj->fieldname( $value );
+>
+
+=head1 DESCRIPTION
+
+This module provides read-only access to a single record of the WebFetch data.
+
+=cut
+
+our $AUTOLOAD;
+
+# initialization
+sub init
+{
+ my $self = shift;
+
+ # save parameters
+ $self->{obj} = shift;
+ $self->{num} = shift;
+ $self->{recref} = $self->{obj}{records}[$self->{num}];
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ # make accessor functions
+ my $field;
+ my $class = ref( $self );
+ foreach $field ( @{$self->{obj}{fields}}) {
+ $class->mk_field_accessor( $field );
+ }
+ foreach $field ( keys %{$self->{obj}{wk_names}}) {
+ $class->mk_field_accessor( $field );
+ }
+
+ return $self;
+}
+
+# shortcut function to top-level WebFetch object data
+sub data { return $_[0]->{obj}; }
+
+=item $value = $obj->bynum( $field_num );
+
+Returns the value of the field located by the field number provided.
+The first field is numbered 0.
+
+=cut
+
+# get a field by number
+sub bynum
+{
+ my $self = shift;
+ my $f = shift;
+
+ WebFetch::debug "bynum $f";
+ return $self->{recref}[$f];
+}
+
+=item $value = $obj->byname( $field_name );
+
+Returns the value of the named field.
+
+=cut
+
+# get a field by name
+sub byname
+{
+ my $self = shift;
+ my $fname = shift;
+ my $obj = $self->{obj};
+ my $f;
+
+ WebFetch::debug "byname ".(( defined $fname ) ? $fname : "undef");
+ ( defined $fname ) or return undef;
+ if ( exists $obj->{findex}{$fname}) {
+ $f = $obj->{findex}{$fname};
+ return $self->{recref}[$f];
+ }
+ return undef;
+}
+
+=item $class->mk_field_accessor( $field_name, ... );
+
+Creates accessor functions for each field name provided.
+
+=cut
+
+# make field accessor/mutator functions
+sub mk_field_accessor
+{
+ my $class = shift;
+ my $name;
+
+ foreach $name ( @_ ) {
+ no strict 'refs';
+ $class->can( $name ) and next; # skip if function exists!
+
+ # make a closure which keeps value of $name from this call
+ # keep generic so code can use more than one data type per run
+ *{$class."::".$name} = sub {
+ my $self = shift;
+ my $value = shift;
+ my $obj = $self->{obj};
+ my $recref = $self->{recref};
+ my $f;
+ if ( exists $obj->{findex}{$name}) {
+ $f = $obj->{findex}{$name};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } elsif ( exists $obj->{wk_names}{$name}) {
+ my $wk = $obj->{wk_names}{$name};
+ $f = $obj->{findex}{$wk};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } else {
+ return undef;
+ }
+ };
+ }
+}
+
+=item accessor functions
+
+Accessor functions are created for field names and
+well-known names as they are defined.
+
+So a field named "title" can be accessed by an object method of the same
+name, like $obj->title .
+
+=cut
+
+# AUTOLOAD function to provide field accessors/mutators
+sub AUTOLOAD
+{
+ my $self = shift;
+ my $type = ref($self) or throw_autoload_fail "self is not an object";
+
+ my $name = $AUTOLOAD;
+ $name =~ s/.*://; # strip fully-qualified portion, just want function
+
+ # decline all-caps names - reserved for special Perl functions
+ ( $name =~ /^[A-Z]+$/ ) and return;
+
+ WebFetch::debug __PACKAGE__."::AUTOLOAD $name";
+ if (( exists $self->{obj}{findex}{$name})
+ or ( exists $self->{obj}{wk_names}{$name}))
+ {
+ $type->mk_field_accessor( $name );
+ return $self->$name(@_);
+ } else {
+ throw_autoload_fail "no such function or field $name";
+ }
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Deleted: trunk/lib/WebFetch/Data/Store.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Data/Store.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -1,359 +0,0 @@
-#
-# WebFetch::Data::Store - WebFetch Embedding API top-level data store
-#
-# Copyright (c) 2009 Ian Kluft. This program is free software; you can
-# redistribute it and/or modify it under the terms of the GNU General Public
-# License Version 3. See http://www.webfetch.org/GPLv3.txt
-#
-# The WebFetch Embedding API manages the following data:
-# * {data} - top level hash container (WebFetch::Data::Store)
-# * {fields} - array of field names
-# * {records} - array of data records (WebFetch::Data::Record)
-# * each record is an array of data fields in the order of the field names
-# * {wk_names} - hash of WebFetch well-known fields to actual field names
-# * {feed} - top-level arbitrary info about the feed
-#
-
-package WebFetch::Data::Store;
-
-use strict;
-use warnings;
-use WebFetch;
-use base qw( WebFetch );
-
-# define exceptions/errors
-use Exception::Class (
-);
-
-# no user-servicable parts beyond this point
-
-=head1 NAME
-
-WebFetch::Data::Store - Object for management of WebFetch data
-
-=head1 SYNOPSIS
-
-C<use WebFetch::Data::Store;>
-
-C<$data = webfetch_obj-E<gt>data;
-$data-E<gt>add_fields( "field1", "field2", ... );
-$num = $data-E<gt>num_fields;
-@field_names = $data-E<gt>get_fields;
-$name = $data-E<gt>field_bynum( 3 );
-$data-E<gt>add_wk_names( "title" =E<gt> "heading", "url" =E<gt> "link", ... );
-$value = $data-E<gt>get_feed( $name );
-$data-E<gt>set_feed( $name, $value );
-$data-E<gt>add_record( $field1, $field2, ... ); # order corresponds to add_fields
-$num = $data-E<gt>num_records;
-$record = $data-E<gt>get_record( $n );
-$data-E<gt>reset_pos;
-$record = $data-E<gt>next_record;
-$name = $data-E<gt>wk2fname( $wk_name );
-$num = $data-E<gt>fname2fnum( $field_name );
-$num = $data-E<gt>wk2fnum( $wk_name );
->
-
-=head1 DESCRIPTION
-
-This module provides access to the WebFetch data.
-WebFetch instantiates the object for the input module.
-The input module uses this to construct the data set from its input.
-The output module uses the this to access the data and
-produce its output object/format.
-
-=cut
-
-# initialization
-sub init
-{
- my $self = shift;
- $self->{fields} = [];
- $self->{findex} = {};
- $self->{records} = [];
- $self->{wk_names} = {};
- $self->{wkindex} = {};
- $self->{feed} = {};
-
- # signal WebFetch that Data subclasses do not provide a fetch function
- $self->{no_fetch} = 1;
- $self->SUPER::init( @_ );
-
- return $self;
-}
-
-=item $obj->add_fields( "field1", "field2", ... );
-
-Add the field names in the order their values will appear in the data table.
-
-=cut
-
-# add field names
-sub add_fields
-{
- my $self = shift;
- my @fields = @_;
- my $field;
- foreach $field ( @fields ) {
- $self->{findex}{$field} = scalar @{$self->{fields}};
- push @{$self->{fields}}, $field;
- }
-}
-
-=item $num = $obj->num_fields;
-
-Returns the number of fields/columns in the data.
-
-=cut
-
-# get number of fields
-sub num_fields
-{
- my $self = shift;
- return scalar @{$self->{fields}};
-}
-
-=item @field_names = $obj->get_fields;
-
-Gets a list of the field names in the order their values appear in the data
-table;
-
-=cut
-
-# get field names
-sub get_fields
-{
- my $self = shift;
- return keys %{$self->{fields}};
-}
-
-=item $field_name = $obj->field_bynum( $num );
-
-Return a field name string based on the numeric position of the field.
-
-=cut
-
-# get field name by number
-sub field_bynum
-{
- my $self = shift;
- my $num = shift;
- return $self->{fields}[$num];
-}
-
-=item $obj->add_wk_names( "title" => "heading", "url" => "link", ... );
-
-Add associations between WebFetch well-known field names, which allows
-WebFetch to apply meaning to these fields, such as titles, dates and URLs.
-The parameters are pairs of well-known and actual field names.
-Running this function more than once will add to the existing associations
-of well-known to actual field names.
-
-=cut
-
-# add well-known names
-sub add_wk_names
-{
- my $self = shift;
- my ( $wk_name, $field );
-
- while ( @_ >= 2 ) {
- $wk_name = shift;
- $field = shift;
- WebFetch::debug "add_wk_names $wk_name => $field";
- $self->{wk_names}{$wk_name} = $field;
- $self->{wkindex}{$wk_name} = $self->{findex}{$field};
- }
-}
-
-=item $value = $obj->get_feed( $name );
-
-Get an item of per-feed data by name.
-
-=cut
-
-# get feed info
-sub get_feed
-{
- my $self = shift;
- my $name = shift;
- return (exists $self->{$name}) ? $self->{$name} : undef;
-}
-
-=item $obj->set_feed( $name, $value );
-
-Set an item of per-feed data by name and value.
-
-=cut
-
-# set feed info
-sub set_feed
-{
- my $self = shift;
- my $name = shift;
- my $value = shift;
- my $retval = (exists $self->{$name}) ? $self->{$name} : undef;
- $self->{$name} = $value;
- return $retval;
-}
-
-=item $obj->add_record( $value1, $value2, $value3, ... );
-
-Add a row to the end of the data table. Values must correspond to the
-positions of the field names that were provided earlier.
-
-=cut
-
-# add a data record
-# this adds the field values in the same order the field names were added
-sub add_record
-{
- my $self = shift;
- push @{$self->{records}}, [ @_ ];
-}
-
-# TODO: add a function add_record_unordered( name => value, ... )
-# less efficient, but may be OK for cases where that doesn't matter
-
-=item $num = $obj->num_records;
-
-Get the number of records/rows in the data table.
-
-=cut
-
-# get the number of data records
-sub num_records
-{
- my $self = shift;
- return scalar @{$self->{records}};
-}
-
-=item $record = get_record( $num );
-
-Returns a WebFetch::Data::Record object for the row located
-by the given row number in the data table. The first row is numbered 0.
-Calling this function does not affect the position used by the next_record
-function.
-
-=cut
-
-# get a data record by index
-sub get_record
-{
- my $self = shift;
- my $n = shift;
- WebFetch::debug "get_record $n";
- require WebFetch::Data::Record;
- return WebFetch::Data::Record->new( $self, $n );
-}
-
-=item $obj->reset_pos;
-
-Reset the position counter used by the next_record function back to the
-beginning of the data table.
-
-=cut
-
-# reset iterator position
-sub reset_pos
-{
- my $self = shift;
-
- WebFetch::debug "reset_pos";
- delete $self->{pos};
-}
-
-=item $record = $obj->next_record;
-
-The first call to this function returns the first record.
-Each successive call to this function returns the following record until
-the end of the data table.
-After the last record, the function returns undef until
-reset_pos is called to reset it back to the beginning.
-
-=cut
-
-# get next record
-sub next_record
-{
- my $self = shift;
-
- # initialize if necessary
- if ( !exists $self->{pos}) {
- $self->{pos} = 0;
- }
- WebFetch::debug "next_record n=".$self->{pos}." of "
- .scalar @{$self->{records}};
-
- # return undef if position is out of bounds
- ( $self->{pos} < 0 ) and return undef;
- ( $self->{pos} > scalar @{$self->{records}} - 1 ) and return undef;
-
- # get record
- return $self->get_record( $self->{pos}++ );
-}
-
-=item $obj->wk2fname( $wk )
-
-Obtain a field name from a well-known name.
-
-=cut
-
-# convert well-known name to field name
-sub wk2fname
-{
- my $self = shift;
- my $wk = shift;
-
- WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef");
- return ( exists $self->{wk_names}{$wk})
- ? $self->{wk_names}{$wk}
- : undef;
-}
-
-=item $obj->fname2fnum( $fname )
-
-Obtain a field number from a field name.
-
-=cut
-
-# convert a field name to a field number
-sub fname2fnum
-{
- my $self = shift;
- my $fname = shift;
-
- WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" );
- return ( exists $self->{findex}{$fname})
- ? $self->{findex}{$fname}
- : undef;
-}
-
-=item $obj->wk2fnum( $wk )
-
-Obtain a field number from a well-known name.
-
-=cut
-
-# convert well-known name to field number
-sub wk2fnum
-{
- my $self = shift;
- my $wk = shift;
-
- WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" );
- return ( exists $self->{wkindex}{$wk})
- ? $self->{wkindex}{$wk}
- : undef;
-}
-
-1;
-__END__
-=head1 AUTHOR
-
-WebFetch was written by Ian Kluft
-Send patches, bug reports, suggestions and questions to
-C<ma...@we...>.
-
-=head1 SEE ALSO
-
-L<WebFetch>, L<WebFetch::Data::Record>
Copied: trunk/lib/WebFetch/Data/Store.pm (from rev 67, branches/v0.13/lib/WebFetch/Data/Store.pm)
===================================================================
--- trunk/lib/WebFetch/Data/Store.pm (rev 0)
+++ trunk/lib/WebFetch/Data/Store.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -0,0 +1,359 @@
+#
+# WebFetch::Data::Store - WebFetch Embedding API top-level data store
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+# The WebFetch Embedding API manages the following data:
+# * {data} - top level hash container (WebFetch::Data::Store)
+# * {fields} - array of field names
+# * {records} - array of data records (WebFetch::Data::Record)
+# * each record is an array of data fields in the order of the field names
+# * {wk_names} - hash of WebFetch well-known fields to actual field names
+# * {feed} - top-level arbitrary info about the feed
+#
+
+package WebFetch::Data::Store;
+
+use strict;
+use warnings;
+use WebFetch;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Store - Object for management of WebFetch data
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Store;>
+
+C<$data = webfetch_obj-E<gt>data;
+$data-E<gt>add_fields( "field1", "field2", ... );
+$num = $data-E<gt>num_fields;
+@field_names = $data-E<gt>get_fields;
+$name = $data-E<gt>field_bynum( 3 );
+$data-E<gt>add_wk_names( "title" =E<gt> "heading", "url" =E<gt> "link", ... );
+$value = $data-E<gt>get_feed( $name );
+$data-E<gt>set_feed( $name, $value );
+$data-E<gt>add_record( $field1, $field2, ... ); # order corresponds to add_fields
+$num = $data-E<gt>num_records;
+$record = $data-E<gt>get_record( $n );
+$data-E<gt>reset_pos;
+$record = $data-E<gt>next_record;
+$name = $data-E<gt>wk2fname( $wk_name );
+$num = $data-E<gt>fname2fnum( $field_name );
+$num = $data-E<gt>wk2fnum( $wk_name );
+>
+
+=head1 DESCRIPTION
+
+This module provides access to the WebFetch data.
+WebFetch instantiates the object for the input module.
+The input module uses this to construct the data set from its input.
+The output module uses the this to access the data and
+produce its output object/format.
+
+=cut
+
+# initialization
+sub init
+{
+ my $self = shift;
+ $self->{fields} = [];
+ $self->{findex} = {};
+ $self->{records} = [];
+ $self->{wk_names} = {};
+ $self->{wkindex} = {};
+ $self->{feed} = {};
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ return $self;
+}
+
+=item $obj->add_fields( "field1", "field2", ... );
+
+Add the field names in the order their values will appear in the data table.
+
+=cut
+
+# add field names
+sub add_fields
+{
+ my $self = shift;
+ my @fields = @_;
+ my $field;
+ foreach $field ( @fields ) {
+ $self->{findex}{$field} = scalar @{$self->{fields}};
+ push @{$self->{fields}}, $field;
+ }
+}
+
+=item $num = $obj->num_fields;
+
+Returns the number of fields/columns in the data.
+
+=cut
+
+# get number of fields
+sub num_fields
+{
+ my $self = shift;
+ return scalar @{$self->{fields}};
+}
+
+=item @field_names = $obj->get_fields;
+
+Gets a list of the field names in the order their values appear in the data
+table;
+
+=cut
+
+# get field names
+sub get_fields
+{
+ my $self = shift;
+ return keys %{$self->{fields}};
+}
+
+=item $field_name = $obj->field_bynum( $num );
+
+Return a field name string based on the numeric position of the field.
+
+=cut
+
+# get field name by number
+sub field_bynum
+{
+ my $self = shift;
+ my $num = shift;
+ return $self->{fields}[$num];
+}
+
+=item $obj->add_wk_names( "title" => "heading", "url" => "link", ... );
+
+Add associations between WebFetch well-known field names, which allows
+WebFetch to apply meaning to these fields, such as titles, dates and URLs.
+The parameters are pairs of well-known and actual field names.
+Running this function more than once will add to the existing associations
+of well-known to actual field names.
+
+=cut
+
+# add well-known names
+sub add_wk_names
+{
+ my $self = shift;
+ my ( $wk_name, $field );
+
+ while ( @_ >= 2 ) {
+ $wk_name = shift;
+ $field = shift;
+ WebFetch::debug "add_wk_names $wk_name => $field";
+ $self->{wk_names}{$wk_name} = $field;
+ $self->{wkindex}{$wk_name} = $self->{findex}{$field};
+ }
+}
+
+=item $value = $obj->get_feed( $name );
+
+Get an item of per-feed data by name.
+
+=cut
+
+# get feed info
+sub get_feed
+{
+ my $self = shift;
+ my $name = shift;
+ return (exists $self->{$name}) ? $self->{$name} : undef;
+}
+
+=item $obj->set_feed( $name, $value );
+
+Set an item of per-feed data by name and value.
+
+=cut
+
+# set feed info
+sub set_feed
+{
+ my $self = shift;
+ my $name = shift;
+ my $value = shift;
+ my $retval = (exists $self->{$name}) ? $self->{$name} : undef;
+ $self->{$name} = $value;
+ return $retval;
+}
+
+=item $obj->add_record( $value1, $value2, $value3, ... );
+
+Add a row to the end of the data table. Values must correspond to the
+positions of the field names that were provided earlier.
+
+=cut
+
+# add a data record
+# this adds the field values in the same order the field names were added
+sub add_record
+{
+ my $self = shift;
+ push @{$self->{records}}, [ @_ ];
+}
+
+# TODO: add a function add_record_unordered( name => value, ... )
+# less efficient, but may be OK for cases where that doesn't matter
+
+=item $num = $obj->num_records;
+
+Get the number of records/rows in the data table.
+
+=cut
+
+# get the number of data records
+sub num_records
+{
+ my $self = shift;
+ return scalar @{$self->{records}};
+}
+
+=item $record = get_record( $num );
+
+Returns a WebFetch::Data::Record object for the row located
+by the given row number in the data table. The first row is numbered 0.
+Calling this function does not affect the position used by the next_record
+function.
+
+=cut
+
+# get a data record by index
+sub get_record
+{
+ my $self = shift;
+ my $n = shift;
+ WebFetch::debug "get_record $n";
+ require WebFetch::Data::Record;
+ return WebFetch::Data::Record->new( $self, $n );
+}
+
+=item $obj->reset_pos;
+
+Reset the position counter used by the next_record function back to the
+beginning of the data table.
+
+=cut
+
+# reset iterator position
+sub reset_pos
+{
+ my $self = shift;
+
+ WebFetch::debug "reset_pos";
+ delete $self->{pos};
+}
+
+=item $record = $obj->next_record;
+
+The first call to this function returns the first record.
+Each successive call to this function returns the following record until
+the end of the data table.
+After the last record, the function returns undef until
+reset_pos is called to reset it back to the beginning.
+
+=cut
+
+# get next record
+sub next_record
+{
+ my $self = shift;
+
+ # initialize if necessary
+ if ( !exists $self->{pos}) {
+ $self->{pos} = 0;
+ }
+ WebFetch::debug "next_record n=".$self->{pos}." of "
+ .scalar @{$self->{records}};
+
+ # return undef if position is out of bounds
+ ( $self->{pos} < 0 ) and return undef;
+ ( $self->{pos} > scalar @{$self->{records}} - 1 ) and return undef;
+
+ # get record
+ return $self->get_record( $self->{pos}++ );
+}
+
+=item $obj->wk2fname( $wk )
+
+Obtain a field name from a well-known name.
+
+=cut
+
+# convert well-known name to field name
+sub wk2fname
+{
+ my $self = shift;
+ my $wk = shift;
+
+ WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef");
+ return ( exists $self->{wk_names}{$wk})
+ ? $self->{wk_names}{$wk}
+ : undef;
+}
+
+=item $obj->fname2fnum( $fname )
+
+Obtain a field number from a field name.
+
+=cut
+
+# convert a field name to a field number
+sub fname2fnum
+{
+ my $self = shift;
+ my $fname = shift;
+
+ WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" );
+ return ( exists $self->{findex}{$fname})
+ ? $self->{findex}{$fname}
+ : undef;
+}
+
+=item $obj->wk2fnum( $wk )
+
+Obtain a field number from a well-known name.
+
+=cut
+
+# convert well-known name to field number
+sub wk2fnum
+{
+ my $self = shift;
+ my $wk = shift;
+
+ WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" );
+ return ( exists $self->{wkindex}{$wk})
+ ? $self->{wkindex}{$wk}
+ : undef;
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Modified: trunk/lib/WebFetch/Input/Atom.pm
===================================================================
--- trunk/lib/WebFetch/Input/Atom.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Input/Atom.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -19,12 +19,56 @@
use Exception::Class (
);
+=head1 NAME
+
+WebFetch::Input::Atom - WebFetch input from Atom feeds
+
+=head1 SYNOPSIS
+
+This is an input module for WebFetch which accesses an Atom feed.
+The --source parameter contains the URL of the feed.
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
+ --source atom-feed-url [...WebFetch output options...]>
+
+In perl scripts:
+
+ use WebFetch::Input::Atom;
+
+ my $obj = WebFetch->new(
+ "dir" => "/path/to/fetch/workspace",
+ "source" => "http://search.twitter.com/search.atom?q=%23twiki",
+ "source_format" => "atom",
+ "dest" => "dump",
+ "dest_format" = "/path/to/dump/file",
+ );
+ $obj->do_actions; # process output
+ $obj->save; # save results
+
+
+=head1 DESCRIPTION
+
+This module gets the current headlines from a site-local file.
+
+The I<--input> parameter specifies a file name which contains news to be
+posted. See L<"FILE FORMAT"> below for details on contents to put in the
+file. I<--input> may be specified more than once, allowing a single news
+output to come from more than one input. For example, one file could be
+manually maintained in CVS or RCS and another could be entered from a
+web form.
+
+After this runs, the file C<site_news.html> will be created or replaced.
+If there already was a C<site_news.html> file, it will be moved to
+C<Osite_news.html>.
+
+=cut
+
+
our @Options = ();
our $Usage = "";
-# configuration parameters
-our $num_links = 5;
-
# no user-servicable parts beyond this point
# register capabilities with WebFetch
@@ -35,30 +79,18 @@
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- if ( !defined $self->{num_links}) {
- $self->{num_links} = $WebFetch::Input::Atom::num_links;
- }
- if ( !defined $self->{style}) {
- $self->{style} = {};
- $self->{style}{para} = 1;
- }
-
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "id", "updated", "title", "author", "link",
- "summary", "content", "xml" ];
+ $self->data->add_fields( "id", "updated", "title", "author", "link",
+ "summary", "content", "xml" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
+ "id" => "id",
"title" => "title",
"url" => "link",
"date" => "updated",
"summary" => "summary",
- };
- $self->{data}{records} = [];
+ );
- # process the links
-
# parse data file
$self->parse_input();
@@ -101,15 +133,15 @@
# save the data record
my $id = extract_value( $entry->id() );
my $title = extract_value( $entry->title() );
- my $author = extract_value( $entry->author->name );
+ my $author = ( defined $entry->author )
+ ? extract_value( $entry->author->name ) : "";
my $link = extract_value( $entry->link->href );
my $updated = extract_value( $entry->updated() );
my $summary = extract_value( $entry->summary() );
my $content = extract_value( $entry->content() );
my $xml = $entry->as_xml();
- push @{$self->{data}{records}},
- [ $id, $updated, $title, $author, $link, $summary,
- $content, $xml ];
+ $self->data->add_record( $id, $updated, $title,
+ $author, $link, $summary, $content, $xml );
}
}
@@ -117,36 +149,6 @@
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::Atom - download and save an Atom feed
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::Atom;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
- --source atom-feed-url [...WebFetch output options...]>
-
-=head1 DESCRIPTION
-
-This module gets the current headlines from a site-local file.
-
-The I<--input> parameter specifies a file name which contains news to be
-posted. See L<"FILE FORMAT"> below for details on contents to put in the
-file. I<--input> may be specified more than once, allowing a single news
-output to come from more than one input. For example, one file could be
-manually maintained in CVS or RCS and another could be entered from a
-web form.
-
-After this runs, the file C<site_news.html> will be created or replaced.
-If there already was a C<site_news.html> file, it will be moved to
-C<Osite_news.html>.
-
=head1 Atom FORMAT
Atom is an XML format defined at http://atompub.org/rfc4287.html
Modified: trunk/lib/WebFetch/Input/PerlStruct.pm
===================================================================
--- trunk/lib/WebFetch/Input/PerlStruct.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Input/PerlStruct.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -1,4 +1,3 @@
-#
# WebFetch::Input::PerlStruct.pm
# push a Perl structure with pre-parsed news into WebFetch
#
@@ -11,15 +10,26 @@
use strict;
use base "WebFetch";
-use Carp;
+# define exceptions/errors
+use Exception::Class (
+ "WebFetch::Input::PerlStruct::Exception::NoStruct" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_nostruct",
+ description => "no 'content' structure was provided",
+ },
-our $format;
-our @Options = ( "format:s" );
+ "WebFetch::Input::PerlStruct::Exception::BadStruct" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_badstruct",
+ description => "content of 'content' was not recognizable",
+ },
+
+);
+
+our @Options = ( );
our $Usage = "";
# configuration parameters
-our $num_links = 5;
-our $default_format = "<a href=\"%url%\">%title%</a>";
# no user-servicable parts beyond this point
@@ -30,40 +40,27 @@
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- $self->{num_links} = $WebFetch::Input::PerlStruct::num_links;
- if ( defined $format ) {
- $self->{"format"} = $format;
- } else {
- $self->{"format"} = $WebFetch::Input::PerlStruct::default_format;
- }
-
# get the content from the provided perl structure
if ( !defined $self->{content}) {
- croak "WebFetch::Input::PerlStruct: content struct does not exist\n";
+ throw_nostruct "content struct does not exist";
}
- if ( ref($self->{content}) != "ARRAY" ) {
- croak "WebFetch::Input::PerlStruct: content is not an ARRAY ref\n";
- }
-
- # collate $self->{content} into @content_links by fields from format
- my ( @content_links, $part );
- my @fields = ( $self->{"format"} =~ /%([^%]*)%/go );
- foreach $part ( @{$self->{content}} ) {
- my ( $fname, $subparts );
- $subparts= [];
- foreach $fname ( @fields ) {
- push @$subparts, "".((defined $part->{$fname})
- ? $part->{$fname} : "" );
+ if ( ref($self->{content})->isa( "WebFetch::Data::Store" )) {
+ $self->{data} = $self->{content};
+ return;
+ } elsif ( ref($self->{content}) eq "HASH" ) {
+ if (( exists $self->{content}{fields})
+ and ( exists $self->{content}{records})
+ and ( exists $self->{content}{wk_names}))
+ {
+ $self->data->{fields} = $self->{content}{fields};
+ $self->data->{wk_names} = $self->{content}{wk_names};
+ $self->data->{records} = $self->{content}{records};
+ return;
}
- push ( @content_links, $subparts );
}
-
- # build data structure
- $self->{data} = {};
+ throw_badstruct "content should be a WebFetch::Data::Store";
}
-
1;
__END__
# POD docs follow
@@ -81,11 +78,8 @@
C<$obj = new WebFetch::Input::PerlStruct (
"content" => content_struct,
"dir" => output_dir,
- "file" => output_file,
- [ "format" => format_string, ]
- [ "export" => wf_export_filename, ]
- [ "font_size" => font_size, ]
- [ "font_face" => font_face, ]
+ "dest" => output_file,
+ "dest_format" => output_format, # used to select WebFetch output module
[ "group" => file_group_id, ]
[ "mode" => file_mode_perms, ]
[ "quiet" => 1 ]);>
@@ -116,55 +110,26 @@
=head1 THE CONTENT STRUCTURE
-The $content_struct parameter must be a reference to an array of hashes.
+The $content_struct parameter may be in either of two formats.
+
+If $content_struct is a hash reference containing entries called
+"fields", "wk_names" and "records", then it is assumed to be already
+in the format of the "data" element of the WebFetch Embedding API.
+
+Otherwise, it must be a reference to an array of hashes.
Each of the hashes represents a separate news item,
in the order they should be displayed.
-The fields of each has entry must provide enough information to
-match field names in all the the output formats you're using.
-Output formats include the following:
-=over 4
+The field names should be consistent through all records.
+WebFetch uses the field names from the first record and assumes the
+remainder are identical.
-=item HTML output file
-
-All the fields used in the $format_string (see below) must be present
-for generation of the HTML output.
-
-=item WebFetch export
-
-The $format_string also determines the fields that will be used
-for WebFetch export.
-Note that the WebFetch::General module expects by default to find
-fields called "url" and "title".
-So if you use something different from the default,
-you must provide your format string in the instructions
-for sites that fetch news from you.
-(Otherwise their WebFetch::General won't be looking for the fields
-you're providing.)
-
-=item MyNetscape export
-
-The MyNetscape export function expects to find fields called
-"title" and "url", and will skip any hash entry which is
-missing either of them.
-
-=back
-
-=head1 FORMAT STRINGS
-
-WebFetch::Input::PerlStruct uses a format string identical to WebFetch::General.
-The default format for retrieved data is
-
-<a href="%url%">%title%</a>
-
-See the WebFetch::General documentation for more details.
-
The names of the fields are chosen by the calling function.
-Though for the convenience of the user,
-the author of an exporting module should keep in mind the
-default WebFetch::Input::PerlStruct format uses fields called "url" and "title".
-If you use fields by different names, make sure your code provides those
-fields in the $content_struct parameter.
+If an array called "wk_names" is provided then it used to map
+well-known field names of the WebFetch Embedding API to field names in
+this data.
+Otherwise, meaning can only be applied to field names if they already
+match WebFetch's well-known field names.
=head1 AUTHOR
Modified: trunk/lib/WebFetch/Input/RSS.pm
===================================================================
--- trunk/lib/WebFetch/Input/RSS.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Input/RSS.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -19,47 +19,52 @@
use Exception::Class (
);
+=head1 NAME
+
+WebFetch::Input::RSS - download and save an RSS feed
+
+=cut
+
our @Options = ();
our $Usage = "";
# configuration parameters
-our $num_links = 5;
# no user-servicable parts beyond this point
# register capabilities with WebFetch
__PACKAGE__->module_register( "input:rss" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Input::RSS;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::RSS -e "&fetch_main" -- --dir directory
+ --source rss-feed-url [...WebFetch output options...]>
+
+=cut
+
# called from WebFetch main routine
sub fetch
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- if ( !defined $self->{num_links}) {
- $self->{num_links} = $WebFetch::Input::RSS::num_links;
- }
- if ( !defined $self->{style}) {
- $self->{style} = {};
- $self->{style}{para} = 1;
- }
-
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "pubDate", "title", "link", "category",
- "description" ];
+ $self->data->add_fields( "pubDate", "title", "link", "category",
+ "description" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "link",
"date" => "pubDate",
"summary" => "description",
"category" => "category",
- };
- $self->{data}{records} = [];
+ );
- # process the links
-
# parse data file
$self->parse_input();
@@ -156,48 +161,16 @@
? $item->{category} : "";
my $description = ( defined $item->{description})
? $item->{description} : "";
- push @{$self->{data}{records}},
- [ $pub_date, $title, $link, $category, $description ];
+ $self->data->add_record( $pub_date, $title, $link,
+ $category, $description );
$pos++;
}
}
-#---------------------------------------------------------------------------
-
-#
-# utility functions
-#
-
-# generate a printable version of the datestamp
-sub printstamp
-{
- my ( $stamp ) = @_;
- my ( $year, $mon, $day ) = ( $stamp =~ /^(....)(..)(..)/ );
-
- return Month_to_Text(int($mon))." ".int($day).", $year";
-}
-
-#---------------------------------------------------------------------------
-
1;
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::RSS - download and save an RSS feed
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::RSS;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::RSS -e "&fetch_main" -- --dir directory
- --source rss-feed-url [...WebFetch output options...]>
-
=head1 DESCRIPTION
This module gets the current headlines from a site-local file.
Modified: trunk/lib/WebFetch/Input/SiteNews.pm
===================================================================
--- trunk/lib/WebFetch/Input/SiteNews.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Input/SiteNews.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -13,6 +13,12 @@
use Carp;
use Date::Calc qw(Today Delta_Days Month_to_Text);
+=head1 NAME
+
+WebFetch::Input::SiteNews - download and save SiteNews headlines
+
+=cut
+
# set defaults
our ( $cat_priorities, $now, $nowstamp );
@@ -30,6 +36,35 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "cmdline", "input:sitenews" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Input::SiteNews;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory
+ --source news-file --short short-form-output-file
+ --long long-form-output-file>
+
+=head1 DESCRIPTION
+
+This module gets the current headlines from a site-local file.
+
+The I<--source> parameter specifies a file name which contains news to be
+posted. See L<"FILE FORMAT"> below for details on contents to put in the
+file. I<--source> may be specified more than once, allowing a single news
+output to come from more than one input. For example, one file could be
+manually maintained in CVS or RCS and another could be entered from a
+web form.
+
+After this runs, the file C<site_news.html> will be created or replaced.
+If there already was a C<site_news.html> file, it will be moved to
+C<Osite_news.html>.
+
+=cut
+
# constants for state names
sub initial_state { 0; }
sub attr_state { 1; }
@@ -49,19 +84,17 @@
}
# set up Webfetch Embedding API data
- $self->{data} = {};
$self->{actions} = {};
- $self->{data}{fields} = [ "date", "title", "priority", "expired",
- "position", "label", "url", "category", "text" ];
+ $self->data->add_fields( "date", "title", "priority", "expired",
+ "position", "label", "url", "category", "text" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "url",
"date" => "date",
"summary" => "text",
"category" => "category"
- };
- $self->{data}{records} = [];
+ );
# process the links
@@ -228,83 +261,14 @@
my $text = ( defined $item->{text}) ? $item->{text} : "";
my $url_prefix = ( defined $self->{url_prefix})
? $self->{url_prefix} : "";
- push @{$self->{data}{records}},
- [ printstamp($posted), $title, priority( $item ),
+ $self->data->add_record(
+ printstamp($posted), $title, priority( $item ),
expired( $item ), $pos, $label,
- $url_prefix."#".$label, $category, $text ];
+ $url_prefix."#".$label, $category, $text );
$pos++;
}
}
-# format handler function specific to this module's long-news output format
-sub fmt_handler_sitenews_long
-{
- my ( $self, $filename ) = @_;
-
- # sort events for long display
- my @long_news = sort {
- # sort news entries for long display
- # sorting priority:
- # date first
- # category/priority second
- # reverse file order last
-
- # sort by date
- my $lbl_fnum = $self->fname2fnum("label");
- my ( $a_date, $b_date) = ( $a->[$lbl_fnum], $b->[$lbl_fnum]);
- $a_date =~ s/-.*//;
- $b_date =~ s/-.*//;
- if ( $a_date ne $b_date ) {
- return $b_date cmp $a_date;
- }
-
- # sort by priority (within same date)
- my $pri_fnum = $self->fname2fnum("priority");
- if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) {
- return $a->[$pri_fnum] <=> $b->[$pri_fnum];
- }
-
- # sort by chronological order (within same date and priority)
- return $a->[$lbl_fnum] cmp $b->[$lbl_fnum];
- } @{$self->{data}{records}};
-
- # process the links for the long list
- my ( @long_text, $prev, $url_prefix, $i );
- $url_prefix = ( defined $self->{url_prefix})
- ? $self->{url_prefix}
- : "";
- $prev=undef;
- push @long_text, "<dl>";
- my $lbl_fnum = $self->fname2fnum("label");
- my $date_fnum = $self->fname2fnum("date");
- my $title_fnum = $self->fname2fnum("title");
- my $txt_fnum = $self->fname2fnum("text");
- my $exp_fnum = $self->fname2fnum("expired");
- my $pri_fnum = $self->fname2fnum("priority");
- for ( $i = 0; $i <= $#long_news; $i++ ) {
- my $news = $long_news[$i];
- if (( ! defined $prev->[$date_fnum]) or
- $prev->[$date_fnum] ne $news->[$date_fnum])
- {
- push @long_text, "<dt>".$news->[$date_fnum];
- push @long_text, "<dd>";
- }
- push @long_text, "<a name=\"".$news->[$lbl_fnum]."\">"
- .$news->[$txt_fnum]."</a>\n"
- ."<!--- priority: ".$news->[$pri_fnum]
- .($news->[$exp_fnum] ? " expired" : "")
- ." --->";
- push @long_text, "<p>";
- $prev = $news;
- }
- push @long_text, "</dl>";
-
- # store it for later save to disk
- $self->html_savable( $self->{long_path}, join("\n",@long_text)."\n" );
-}
-
-#---------------------------------------------------------------------------
-
#
# utility functions
#
@@ -351,43 +315,10 @@
}
}
-#---------------------------------------------------------------------------
-
1;
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::SiteNews - download and save SiteNews headlines
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::SiteNews;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory
- --source news-file --short short-form-output-file
- --long long-form-output-file>
-
-=head1 DESCRIPTION
-
-This module gets the current headlines from a site-local file.
-
-The I<--source> parameter specifies a file name which contains news to be
-posted. See L<"FILE FORMAT"> below for details on contents to put in the
-file. I<--source> may be specified more than once, allowing a single news
-output to come from more than one input. For example, one file could be
-manually maintained in CVS or RCS and another could be entered from a
-web form.
-
-After this runs, the file C<site_news.html> will be created or replaced.
-If there already was a C<site_news.html> file, it will be moved to
-C<Osite_news.html>.
-
=head1 FILE FORMAT
The WebFetch::Input::SiteNews data format is used to set up news for the
Modified: trunk/lib/WebFetch/Output/Dump.pm
===================================================================
--- trunk/lib/WebFetch/Output/Dump.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Output/Dump.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -20,7 +20,12 @@
use Exception::Class (
);
+=head1 NAME
+WebFetch::Output::Dump - save data in a Perl structure dump
+
+=cut
+
# set defaults
our ( @url, $cat_priorities, $now, $nowstamp );
@@ -35,23 +40,6 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "output:dump" );
-# Perl structure dump format handler
-sub fmt_handler_dump
-{
- my ( $self, $filename ) = @_;
-
- $self->raw_savable( $filename, Dumper( $self->{data}));
- 1;
-}
-
-1;
-__END__
-# POD docs follow
-
-=head1 NAME
-
-WebFetch::Output::Dump - save data in a Perl structure dump
-
=head1 SYNOPSIS
In perl scripts:
@@ -65,10 +53,30 @@
=head1 DESCRIPTION
-This module gets the current news headlines from a site-local file.
+This is an output module for WebFetch which simply outputs a Perl
+structure dump from C<Data::Dumper>. It can be read again by a Perl
+script using C<eval>.
-TODO: add description
+=item $obj->fmt_handler_dump( $filename )
+This function dumps the data into a string for saving by the WebFetch::save()
+function.
+
+=cut
+
+# Perl structure dump format handler
+sub fmt_handler_dump
+{
+ my ( $self, $filename ) = @_;
+
+ $self->raw_savable( $filename, Dumper( $self->{data}));
+ 1;
+}
+
+1;
+__END__
+# POD docs follow
+
=head1 AUTHOR
WebFetch was written by Ian Kluft
Modified: trunk/lib/WebFetch/Output/TT.pm
===================================================================
--- trunk/lib/WebFetch/Output/TT.pm 2009-09-21 04:37:45 UTC (rev 67)
+++ trunk/lib/WebFetch/Output/TT.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -23,7 +23,12 @@
);
+=head1 NAME
+WebFetch::Output::TT - save data via the Perl Template Toolkit
+
+=cut
+
# set defaults
our @Options = ( "template=s", "tt_include:s" );
@@ -34,6 +39,29 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "cmdline", "output:tt" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Output::TT;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Output::TT -e "&fetch_main" --
+ [...WebFetch input options...] --dir directory
+ --dest_format tt --dest dest-path --template tt-file >
+
+=head1 DESCRIPTION
+
+This module saves output via the Perl Template Toolkit.
+
+=item $obj->fmt_handler_tt( $filename )
+
+This function formats the data according to the Perl Template Toolkit
+template provided in the --template parameter.
+
+=cut
+
# Perl Template Toolkit format handler
sub fmt_handler_tt
{
@@ -52,11 +80,10 @@
my $template = Template->new( \%tt_config );
# process template
- my $result = $template->process( $self->{template}, $self->{data},
- \$output );
+ $template->process( $self->{template}, { data => $self->{data}},
+ \$output, { binmode => ':utf8'} )
+ or throw_template $template->error();
- $result or throw_template ( $template->error());
-
$self->raw_savable( $filename, $output );
1;
}
@@ -65,27 +92,6 @@
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Output::TT - save data via the Perl Template Toolkit
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Output::TT;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Output::TT -e "&fetch_main" -- --dir directory
- --dest_format tt --dest dest-path [...WebFetch output options...]>
-
-=head1 DESCRIPTION
-
-This module saves output via the Perl Template Toolkit.
-
-TODO: add description
-
=head1 AUTHOR
WebFetch was written by Ian Kluft
@@ -95,12 +101,13 @@
=head1 SEE ALSO
=for html
-<a href="WebFetch.html">WebFetch</a>
+<a href="WebFetch.html">WebFetch</a>,
+<a href="http://www.template-toolkit.org/>Perl Template Toolkit</a>
=for text
-WebFetch
+WebFetch, Perl Template Toolkit
=for man
-WebFetch
+WebFetch, Perl Template Toolkit
=cut
Copied: trunk/lib/WebFetch/Output/TWiki.pm (from rev 67, branches/v0.13/lib/WebFetch/Output/TWiki.pm)
===================================================================
--- trunk/lib/WebFetch/Output/TWiki.pm (rev 0)
+++ trunk/lib/WebFetch/Output/TWiki.pm 2009-09-21 04:59:20 UTC (rev 68)
@@ -0,0 +1,548 @@
+#
+# WebFetch::Output::TWiki - save data into a TWiki web site
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+
+package WebFetch::Output::TWiki;
+
+use warnings;
+use strict;
+use WebFetch;
+use base "WebFetch";
+use DB_File;
+
+# define exceptions/errors
+use Exception::Class (
+ "WebFetch::Output::TWiki::Exception::NoRoot" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_no_root",
+ description => "WebFetch::Output::TWiki needs to be provided "
+ ."a twiki_root parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::NotFound" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_not_found",
+ description => "the directory in the twiki_root parameter "
+ ."doesn't exist or doesn't have a lib subdirectory",
+ },
+ "WebFetch::Output::TWiki::Exception::Require" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_require",
+ description => "failed to import TWiki or TWiki::Func modules",
+ },
+ "WebFetch::Output::TWiki::Exception::NoConfig" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_no_config",
+ description => "WebFetch::Output::TWiki needs to be provided "
+ ."a config_topic parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::ConfigMissing" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_config_missing",
+ description => "WebFetch::Output::TWiki is missing a required "
+ ."configuration parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::Oops" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_oops",
+ description => "WebFetch::Output::TWiki returned errors from "
+ ."saving one or more entries",
+ },
+ "WebFetch::Output::TWiki::Exception::FieldNotSpecified" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_field_not_specified",
+ description => "a required field was not defined or found",
+ },
+);
+
+=head1 NAME
+
+WebFetch::Output::TWiki - WebFetch output to TWiki web site
+
+=cut
+
+# globals/defaults
+our @Options = ( "twiki_root=s", "config_topic=s", "config_key=s" );
+our $Usage = "--twiki_root path-to-twiki --config_topic web.topic "
+ ."--config_key keyword";
+our @default_field_names = ( qw( key web parent prefix template form
+ options ));
+
+# no user-servicable parts beyond this point
+
+# register capabilities with WebFetch
+__PACKAGE__->module_register( "cmdline", "output:twiki" );
+
+=head1 SYNOPSIS
+
+This is an output module for WebFetch which places the data in pages
+on a TWiki web site. Some of its configuration information is read from
+a TWiki page. Calling or command-line parameters point to the TWiki page
+which has the configuration and a search key to locate the correct line
+in a table.
+
+From the command line...
+
+ perl -w -I$libdir -MWebFetch::Input::Atom -MWebFetch::Output::TWiki -e "&fetch_main" -- --dir "/path/to/fetch/worskspace" --source "http://search.twitter.com/search.atom?q=%23twiki" --dest=twiki --twiki_root=/var/www/twiki --config_topic=Feeds.WebFetchConfig --config_key=twiki
+
+From Perl code...
+
+ use WebFetch;
+
+ my $obj = WebFetch->new(
+ "dir" => "/path/to/fetch/workspace",
+ "source" => "http://search.twitter.com/search.atom?q=%23twiki",
+ "source_format" => "atom",
+ "dest" => "twiki",
+ "dest_format" = "twiki",
+ "twiki_root" => "/var/www/twiki",
+ "config_topic" => "Feeds.WebFetchConfig",
+ "config_key" => "twiki",
+ );
+ $obj->do_actions; # process output
+ $obj->save; # save results
+
+=head1 configuration from TWiki topic
+
+The configuration information on feeds is kept in a TWiki page. You can
+specify any page with a web and topic name, for example C<--config_topic=Feeds.WebFetchConfig> .
+
+The contents of that configuration page could look like this, though with
+any feeds you want to configure. The "Key" field matches the --config_key
+command-line parameter, and then brings in the rest of the configuration
+info from that line. An example is shown below.
+
+=over
+C<< ---+ !WebFetch Configuration >>
+
+C<< The following table is used by !WebFetch to configure news feeds >>
+
+C<< %STARTINCLUDE% >>
+C<< | *Key* | *Web* | *Parent* | *Prefix* | *Template* | *Form* | *Options* | *Modul >>
+e* | *Source* |
+C<< | ikluft-twitter | Feeds | TwitterIkluftFeed | TwitterIkluft | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://twitter.com/statuses/user_timeline/37786023.rss | >>
+C<< | twiki-twitter | Feeds | TwitterTwikiFeed | TwitterTwiki | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://search.twitter.com/search.atom?q=%23twiki | >>
+C<< | cnn | Feeds | RssCnn | RssCnn | RssFeedTemplate | RssFeedForm | separate_topics | RSS | http://rss.cnn.com/rss/cnn_topstories.rss | >>
+C<< %STOPINCLUDE% >>
+=back
+
+The C<%STARTINCLUDE%> and C<%STOPINCLUDE%> are not required. However, if
+present, they are used as boundaries for the inclusion like in a normal
+INCLUDE operation on TWiki.
+
+=cut
+
+# read the TWiki configuation
+sub get_twiki_config
+{
+ my $self = shift;
+ WebFetch::debug "in get_twiki_config";
+
+ # find the TWiki modules
+ if ( ! exists $self->{twiki_root}) {
+ throw_twiki_no_root( "TWiki root directory not defined" );
+ }
+ if (( ! -d $self->{twiki_root}) or ( ! -d $self->{twiki_root}."/lib" ))
+ {
+ throw_twiki_not_found( "can't find TWiki root or lib at "
+ .$self->{twiki_root});
+ }
+
+ # load the TWiki modules
+ WebFetch::debug "loading TWiki modules";
+ push @INC, $self->{twiki_root}."/lib";
+ eval { require TWiki; require TWiki::Func; };
+ if ( $@ ) {
+ throw_twiki_require ( $@ );
+ }
+
+ # initiate TWiki library, create session as user "WebFetch"
+ $self->{twiki_obj} = TWiki->new( "WebFetch" );
+
+ # get the contents of the TWiki topic which contains our configuration
+ if ( !exists $self->{config_topic}) {
+ throw_twiki_no_config( "TWiki configuration page for WebFetch "
+ ."not defined" );
+ }
+ my ( $web, $topic ) = split /\./, $self->{config_topic};
+ WebFetch::debug "config_topic: ".$self->{config_topic}
+ ." -> $web, $topic";
+ if (( ! defined $web ) or ( ! defined $topic )) {
+ throw_twiki_no_config( "TWiki configuration page for WebFetch "
+ ."must be defined in the format web.topic" );
+ }
+
+ # check if a config_key was specified before we read the configuration
+ if ( !exists $self->{config_key}) {
+ throw_twiki_no_config( "TWiki configuration key for WebFetch "
+ ."not defined" );
+ }
+
+ # read the configuration info
+ my $config = TWiki::Func::readTopic( $web, $topic );
+
+ # if STARTINCLUDE and STOPINCLUDE are present, use only what's between
+ if ( $config =~ /%STARTINCLUDE%\s*(.*)\s*%STOPINCLUDE%/s ) {
+ $config = $1;
+ }
+
+ # parse the configuration
+ WebFetch::debug "parsing configuration";
+ my ( @fnames, $line );
+ $self->{twiki_config_all} = [];
+ $self->{twiki_keys} = {};
+ foreach $line ( split /\r*\n+/s, $config ) {
+ if ( $line =~ /^\|\s*(.*)\s*\|\s*$/ ) {
+ my @entries = split /\s*\|\s*/, $1;
+ WebFetch::debug "read entries: ".join( ', ', @entries );
+
+ # first line contains field headings
+ if ( ! @fnames) {
+ # save table headings as field names
+ my $field;
+ foreach $field ( @entries ) {
+ my $tmp = lc($field);
+ $tmp =~ s/\W//g;
+ push @fnames, $tmp;
+ }
+ next;
+ }
+ WebFetch::debug "field names: ".join " ", @fnames;
+
+ # save the entries
+ # it isn't a heading row if we got here
+ # transfer array @entries to named fields in %config
+ WebFetch::debug "data row: ".join " ", @entries;
+ my ( $i, $key, %config );
+ for ( $i=0; $i < scalar @fnames; $i++ ) ...
[truncated message content] |
|
From: <ik...@us...> - 2009-09-21 04:37:51
|
Revision: 67
http://webfetch.svn.sourceforge.net/webfetch/?rev=67&view=rev
Author: ikluft
Date: 2009-09-21 04:37:45 +0000 (Mon, 21 Sep 2009)
Log Message:
-----------
merge from trunk
Property Changed:
----------------
branches/v0.13/
Property changes on: branches/v0.13
___________________________________________________________________
Modified: svn:mergeinfo
- /trunk:38,44-64
+ /trunk:38,44-66
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-21 04:37:04
|
Revision: 66
http://webfetch.svn.sourceforge.net/webfetch/?rev=66&view=rev
Author: ikluft
Date: 2009-09-21 04:36:51 +0000 (Mon, 21 Sep 2009)
Log Message:
-----------
update version to 0.13 for release
Modified Paths:
--------------
branches/v0.13/lib/WebFetch.pm
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-21 04:35:48 UTC (rev 65)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-21 04:36:51 UTC (rev 66)
@@ -187,7 +187,7 @@
);
# initialize class variables
-our $VERSION = '0.13-pre59';
+our $VERSION = '0.13';
our %default_modules = (
"input" => {
"rss" => "WebFetch::Input::RSS",
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-21 04:36:05
|
Revision: 65
http://webfetch.svn.sourceforge.net/webfetch/?rev=65&view=rev
Author: ikluft
Date: 2009-09-21 04:35:48 +0000 (Mon, 21 Sep 2009)
Log Message:
-----------
sync with trunk
Property Changed:
----------------
branches/v0.13/
Property changes on: branches/v0.13
___________________________________________________________________
Modified: svn:mergeinfo
- /trunk:38,44-62
+ /trunk:38,44-64
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-21 04:22:47
|
Revision: 64
http://webfetch.svn.sourceforge.net/webfetch/?rev=64&view=rev
Author: ikluft
Date: 2009-09-21 04:22:34 +0000 (Mon, 21 Sep 2009)
Log Message:
-----------
docs
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Output/TWiki.pm
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-14 23:32:54 UTC (rev 63)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-21 04:22:34 UTC (rev 64)
@@ -104,10 +104,34 @@
$obj->do_actions; # process output
$obj->save; # save results
-=head1 FUNCTIONS
+=head1 configuration from TWiki topic
-=head2 get_twiki_config
+The configuration information on feeds is kept in a TWiki page. You can
+specify any page with a web and topic name, for example C<--config_topic=Feeds.WebFetchConfig> .
+The contents of that configuration page could look like this, though with
+any feeds you want to configure. The "Key" field matches the --config_key
+command-line parameter, and then brings in the rest of the configuration
+info from that line. An example is shown below.
+
+=over
+C<< ---+ !WebFetch Configuration >>
+
+C<< The following table is used by !WebFetch to configure news feeds >>
+
+C<< %STARTINCLUDE% >>
+C<< | *Key* | *Web* | *Parent* | *Prefix* | *Template* | *Form* | *Options* | *Modul >>
+e* | *Source* |
+C<< | ikluft-twitter | Feeds | TwitterIkluftFeed | TwitterIkluft | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://twitter.com/statuses/user_timeline/37786023.rss | >>
+C<< | twiki-twitter | Feeds | TwitterTwikiFeed | TwitterTwiki | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://search.twitter.com/search.atom?q=%23twiki | >>
+C<< | cnn | Feeds | RssCnn | RssCnn | RssFeedTemplate | RssFeedForm | separate_topics | RSS | http://rss.cnn.com/rss/cnn_topstories.rss | >>
+C<< %STOPINCLUDE% >>
+=back
+
+The C<%STARTINCLUDE%> and C<%STOPINCLUDE%> are not required. However, if
+present, they are used as boundaries for the inclusion like in a normal
+INCLUDE operation on TWiki.
+
=cut
# read the TWiki configuation
@@ -219,10 +243,7 @@
WebFetch::debug "twiki_config: ".join( " ", %{$self->{twiki_config}});
}
-=head2 write_to_twiki
-
-=cut
-
+# write to a TWiki page
sub write_to_twiki
{
my $self = shift;
@@ -269,10 +290,7 @@
}
}
-=head2 write_to_twiki_topics
-
-=cut
-
+# write to separate TWiki topics
sub write_to_twiki_topics
{
my $self = shift;
@@ -380,10 +398,7 @@
}
}
-=head2 write_to_twiki_metadata
-
-=cut
-
+# write to successive items of TWiki metadata
sub write_to_twiki_metadata
{
my $self = shift;
@@ -475,10 +490,6 @@
}
}
-=head2 fmt_handler_twiki
-
-=cut
-
# TWiki format handler
sub fmt_handler_twiki
{
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-14 23:33:01
|
Revision: 63
http://webfetch.svn.sourceforge.net/webfetch/?rev=63&view=rev
Author: ikluft
Date: 2009-09-14 23:32:54 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
merge from trunk
Property Changed:
----------------
branches/v0.13/
Property changes on: branches/v0.13
___________________________________________________________________
Modified: svn:mergeinfo
- /trunk:38
+ /trunk:38,44-62
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-14 23:20:39
|
Revision: 62
http://webfetch.svn.sourceforge.net/webfetch/?rev=62&view=rev
Author: ikluft
Date: 2009-09-14 23:20:26 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
streamline, add missing function
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Data/Record.pm
branches/v0.13/lib/WebFetch/Data/Store.pm
Modified: branches/v0.13/lib/WebFetch/Data/Record.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-14 23:19:20 UTC (rev 61)
+++ branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-14 23:20:26 UTC (rev 62)
@@ -30,9 +30,9 @@
=head1 SYNOPSIS
-C<use WebFetch::Data::Record;
+C<use WebFetch::Data::Record;>
-WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
+C<WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
$value = $obj-E<gt>bynum( $num );
$value = $obj->fieldname;
$obj->fieldname( $value );
@@ -74,7 +74,7 @@
}
# shortcut function to top-level WebFetch object data
-sub data { return $_[0]->{obj}->data; }
+sub data { return $_[0]->{obj}; }
=item $value = $obj->bynum( $field_num );
@@ -88,11 +88,34 @@
{
my $self = shift;
my $f = shift;
- my $num = $self->{num};
+ WebFetch::debug "bynum $f";
return $self->{recref}[$f];
}
+=item $value = $obj->byname( $field_name );
+
+Returns the value of the named field.
+
+=cut
+
+# get a field by name
+sub byname
+{
+ my $self = shift;
+ my $fname = shift;
+ my $obj = $self->{obj};
+ my $f;
+
+ WebFetch::debug "byname ".(( defined $fname ) ? $fname : "undef");
+ ( defined $fname ) or return undef;
+ if ( exists $obj->{findex}{$fname}) {
+ $f = $obj->{findex}{$fname};
+ return $self->{recref}[$f];
+ }
+ return undef;
+}
+
=item $class->mk_field_accessor( $field_name, ... );
Creates accessor functions for each field name provided.
Modified: branches/v0.13/lib/WebFetch/Data/Store.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-14 23:19:20 UTC (rev 61)
+++ branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-14 23:20:26 UTC (rev 62)
@@ -33,9 +33,9 @@
=head1 SYNOPSIS
-C<use WebFetch::Data::Store;
+C<use WebFetch::Data::Store;>
-$data = webfetch_obj-E<gt>data;
+C<$data = webfetch_obj-E<gt>data;
$data-E<gt>add_fields( "field1", "field2", ... );
$num = $data-E<gt>num_fields;
@field_names = $data-E<gt>get_fields;
@@ -301,7 +301,8 @@
# convert well-known name to field name
sub wk2fname
{
- my ( $self, $wk ) = @_;
+ my $self = shift;
+ my $wk = shift;
WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef");
return ( exists $self->{wk_names}{$wk})
@@ -318,7 +319,8 @@
# convert a field name to a field number
sub fname2fnum
{
- my ( $self, $fname ) = @_;
+ my $self = shift;
+ my $fname = shift;
WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" );
return ( exists $self->{findex}{$fname})
@@ -335,7 +337,8 @@
# convert well-known name to field number
sub wk2fnum
{
- my ( $self, $wk ) = @_;
+ my $self = shift;
+ my $wk = shift;
WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" );
return ( exists $self->{wkindex}{$wk})
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-14 23:19:31
|
Revision: 61
http://webfetch.svn.sourceforge.net/webfetch/?rev=61&view=rev
Author: ikluft
Date: 2009-09-14 23:19:20 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
first cut at multiple records per topic
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Output/TWiki.pm
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-14 23:17:37 UTC (rev 60)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-14 23:19:20 UTC (rev 61)
@@ -50,10 +50,10 @@
description => "WebFetch::Output::TWiki returned errors from "
."saving one or more entries",
},
- "WebFetch::Output::TWiki::Exception::NoIdentifier" => {
+ "WebFetch::Output::TWiki::Exception::FieldNotSpecified" => {
isa => "WebFetch::Exception",
- alias => "throw_no_id_field",
- description => "no identifier field could be found",
+ alias => "throw_field_not_specified",
+ description => "a required field was not defined or found",
},
);
@@ -172,27 +172,18 @@
foreach $line ( split /\r*\n+/s, $config ) {
if ( $line =~ /^\|\s*(.*)\s*\|\s*$/ ) {
my @entries = split /\s*\|\s*/, $1;
+ WebFetch::debug "read entries: ".join( ', ', @entries );
- # check first line for field headings
- if ( ! @{$self->{twiki_config_all}}) {
- if ( $entries[0] =~ /\*\w+\*/ ) {
- # save table headings as field names
- my $field;
- foreach $field ( @entries ) {
- if ( $field =~ /\*(\w*)\*/ )
- {
- push @fnames, lc($1);
- } else {
- my $tmp = lc($field);
- $tmp =~ s/\W//g;
- push @fnames, $tmp;
- }
- }
- next;
- } else {
- # use default field names
- @fnames = @default_field_names;
+ # first line contains field headings
+ if ( ! @fnames) {
+ # save table headings as field names
+ my $field;
+ foreach $field ( @entries ) {
+ my $tmp = lc($field);
+ $tmp =~ s/\W//g;
+ push @fnames, $tmp;
}
+ next;
}
WebFetch::debug "field names: ".join " ", @fnames;
@@ -239,29 +230,67 @@
# get config variables
$config = $self->{twiki_config};
- foreach $name ( qw( key web parent prefix template form )) {
- if ( !exists $self->{twiki_config}{$name}) {
- throw_twiki_config_missing( "missing config parameter "
- .$name );
+
+ # parse options
+ my ( $option );
+ $self->{twiki_options} = {};
+ foreach $option ( split /\s+/, $self->{twiki_config}{options}) {
+ if ( $option =~ /^([^=]+)=(.*)/ ) {
+ $self->{twiki_options}{$1} = $2;
+ } else {
+ $self->{twiki_options}{$option} = 1;
}
}
- # get text of template topic
- my $template = TWiki::Func::readTopic( $config->{web},
- $config->{template});
-
# determine unique identifier field
- my $id_field = $self->wk2fnum( "id" );
+ my $id_field;
+ if ( exists $self->{twiki_options}{id_field}) {
+ $id_field = $self->{twiki_options}{id_field};
+ }
if ( ! defined $id_field ) {
- $id_field = $self->wk2fnum( "url" );
+ $id_field = $self->wk2fname( "id" );
}
if ( ! defined $id_field ) {
- $id_field = $self->wk2fnum( "title" );
+ $id_field = $self->wk2fname( "url" );
}
if ( ! defined $id_field ) {
- throw_no_id_field "no usable identifier field was found";
+ $id_field = $self->wk2fname( "title" );
}
-
+ if ( ! defined $id_field ) {
+ throw_field_not_specified "identifier field not specified";
+ }
+ $self->{id_field} = $id_field;
+
+ # determine from options whether each item is making metadata or topics
+ if ( exists $self->{twiki_options}{separate_topics}) {
+ $self->write_to_twiki_topics;
+ } else {
+ $self->write_to_twiki_metadata;
+ }
+}
+
+=head2 write_to_twiki_topics
+
+=cut
+
+sub write_to_twiki_topics
+{
+ my $self = shift;
+
+ # get config variables
+ my $config = $self->{twiki_config};
+ my $name;
+ foreach $name ( qw( key web parent prefix template form )) {
+ if ( !exists $self->{twiki_config}{$name}) {
+ throw_twiki_config_missing( "missing config parameter "
+ .$name );
+ }
+ }
+
+ # get text of template topic
+ my ($meta, $template ) = TWiki::Func::readTopic( $config->{web},
+ $config->{template});
+
# open DB file for tracking unique IDs of articles already processed
my %id_index;
tie %id_index, 'DB_File',
@@ -280,14 +309,15 @@
# create topics with metadata from each WebFetch data record
my $entry;
my @oopses;
+ my $id_field = $self->{id_field};
$self->data->reset_pos;
while ( $entry = $self->data->next_record ) {
# check that this entry hasn't already been forwarded to TWiki
- if ( exists $id_index{$entry->bynum( $id_field )}) {
+ if ( exists $id_index{$entry->byname( $id_field )}) {
next;
}
- $id_index{$entry->bynum( $id_field )} = time;
+ $id_index{$entry->byname( $id_field )} = time;
# select topic name
my $topicname = sprintf $tnum_format, $tnum_counter;
@@ -298,11 +328,10 @@
$tnum_counter++;
$topics{$topicname} = 1;
my $text = $template;
- WebFetch::debug "write_to_twiki: writing $topicname";
+ WebFetch::debug "write_to_twiki_topics: writing $topicname";
# create topic metadata
- my $meta = TWiki::Meta->new ( $self->{twiki_obj},
- $config->{web}, $topicname );
+ #my $meta = TWiki::Meta->new ( $self->{twiki_obj}, $config->{web}, $topicname );
$meta->put( "TOPICPARENT",
{ name => $config->{parent}});
$meta->put( "FORM", { name => $config->{form}});
@@ -338,7 +367,8 @@
my $oopsurl = TWiki::Func::saveTopic( $config->{web},
$topicname, $meta, $text );
if ( $oopsurl ) {
- WebFetch::debug "write_to_twiki: $topicname - $oopsurl";
+ WebFetch::debug "write_to_twiki_topics: "
+ ."$topicname - $oopsurl";
push @oopses, $entry->title." -> "
.$topicname." ".$oopsurl;
}
@@ -346,10 +376,105 @@
# check for errors
if ( @oopses ) {
- throw_twiki_oops( "These saves failed:\n".join "\n", @oopses );
+ throw_twiki_oops( "TWiki saves failed:\n".join "\n", @oopses );
}
}
+=head2 write_to_twiki_metadata
+
+=cut
+
+sub write_to_twiki_metadata
+{
+ my $self = shift;
+
+ # get config variables
+ my $config = $self->{twiki_config};
+ my $name;
+ foreach $name ( qw( key web parent )) {
+ if ( !exists $self->{twiki_config}{$name}) {
+ throw_twiki_config_missing( "missing config parameter "
+ .$name );
+ }
+ }
+
+ # determine metadata title field
+ my $title_field;
+ if ( exists $self->{twiki_options}{title_field}) {
+ $title_field = $self->{twiki_options}{title_field};
+ }
+ if ( ! defined $title_field ) {
+ $title_field = $self->wk2fname( "title" );
+ }
+ if ( ! defined $title_field ) {
+ throw_field_not_specified "title field not specified";
+ }
+
+ # determine metadata value field
+ my $value_field;
+ if ( exists $self->{twiki_options}{value_field}) {
+ $value_field = $self->{twiki_options}{value_field};
+ }
+ if ( ! defined $value_field ) {
+ $value_field = $self->wk2fname( "summary" );
+ }
+ if ( ! defined $value_field ) {
+ throw_field_not_specified "value field not specified";
+ }
+
+ # open DB file for tracking unique IDs of articles already processed
+ my %id_index;
+ tie %id_index, 'DB_File',
+ $self->{dir}."/".$config->{key}."_id_index.db",
+ &DB_File::O_CREAT|&DB_File::O_RDWR, 0640;
+
+ # get text of topic
+ my ($meta, $text) = TWiki::Func::readTopic( $config->{web},
+ $config->{parent});
+
+ # start metadata line counter
+ my $mnum_counter = 0;
+ my $mnum_format = "line-%07d";
+
+ # create metadata lines for each entry
+ my $entry;
+ my @oopses;
+ my $id_field = $self->{id_field};
+ $self->data->reset_pos;
+ while ( $entry = $self->data->next_record ) {
+ # check that this entry hasn't already been forwarded to TWiki
+ if ( exists $id_index{$entry->byname( $id_field )}) {
+ next;
+ }
+ $id_index{$entry->byname( $id_field )} = time;
+
+ # select metadata field name
+ my ( $value, $metaname );
+ $value = $meta->get( "FIELD",
+ $metaname = sprintf( $mnum_format, $mnum_counter ));
+ while ( defined $value ) {
+ $value = $meta->get( "FIELD",
+ $metaname = sprintf( $mnum_format,
+ ++$mnum_counter ));
+ }
+
+ # write the value
+ $meta->putKeyed( "FIELD", {
+ name => $metaname,
+ title => $entry->byname( $title_field ),
+ value => $entry->byname( $value_field ),
+ });
+ }
+
+ # save the topic
+ my $oopsurl = TWiki::Func::saveTopic( $config->{web},
+ $config->{parent}, $meta, $text );
+ if ( $oopsurl ) {
+ throw_twiki_oops "TWiki saves failed: "
+ .$config->{parent}." ".$oopsurl;
+ }
+}
+
=head2 fmt_handler_twiki
=cut
@@ -366,7 +491,7 @@
# write to TWiki topic
$self->write_to_twiki;
- # no savables for WebFetch::save - mark it OK
+ # no savables - mark it OK so WebFetch::save won't call it an error
$self->no_savables_ok;
1;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-14 23:17:50
|
Revision: 60
http://webfetch.svn.sourceforge.net/webfetch/?rev=60&view=rev
Author: ikluft
Date: 2009-09-14 23:17:37 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
avoid use of unref
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/Atom.pm
Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-14 23:15:55 UTC (rev 59)
+++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-14 23:17:37 UTC (rev 60)
@@ -133,7 +133,8 @@
# save the data record
my $id = extract_value( $entry->id() );
my $title = extract_value( $entry->title() );
- my $author = extract_value( $entry->author->name );
+ my $author = ( defined $entry->author )
+ ? extract_value( $entry->author->name ) : "";
my $link = extract_value( $entry->link->href );
my $updated = extract_value( $entry->updated() );
my $summary = extract_value( $entry->summary() );
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-14 23:16:07
|
Revision: 59
http://webfetch.svn.sourceforge.net/webfetch/?rev=59&view=rev
Author: ikluft
Date: 2009-09-14 23:15:55 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
streamline data wrapper functions
Modified Paths:
--------------
branches/v0.13/lib/WebFetch.pm
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-09 00:38:53 UTC (rev 58)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-14 23:15:55 UTC (rev 59)
@@ -187,7 +187,7 @@
);
# initialize class variables
-our $VERSION = '0.13-pre39';
+our $VERSION = '0.13-pre59';
our %default_modules = (
"input" => {
"rss" => "WebFetch::Input::RSS",
@@ -961,8 +961,6 @@
=back
-=back
-
=cut
sub do_actions
@@ -1545,9 +1543,9 @@
#
sub data { my $self = shift; return $self->{data}; }
-sub wk2fname { my $self = shift; return $self->data->wk2fname( @_ )};
-sub fname2fnum { my $self = shift; return $self->data->fname2fnum( @_ )};
-sub wk2fnum { my $self = shift; return $self->data->wk2fnum( @_ )};
+sub wk2fname { my $self = shift; return $self->{data}->wk2fname( @_ )};
+sub fname2fnum { my $self = shift; return $self->{data}->fname2fnum( @_ )};
+sub wk2fnum { my $self = shift; return $self->{data}->wk2fnum( @_ )};
=item AUTOLOAD functionality
@@ -1660,6 +1658,9 @@
Send patches, bug reports, suggestions and questions to
C<ma...@we...>.
+Some changes in versions 0.12-0.13 (Aug-Sep 2009) were made for and
+sponsored by Twiki Inc (formerly TWiki.Net).
+
=head1 LICENSE
WebFetch is Open Source software distributed via the
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-09 00:39:05
|
Revision: 58
http://webfetch.svn.sourceforge.net/webfetch/?rev=58&view=rev
Author: ikluft
Date: 2009-09-09 00:38:53 +0000 (Wed, 09 Sep 2009)
Log Message:
-----------
update MANIFEST
Modified Paths:
--------------
branches/v0.13/MANIFEST
Modified: branches/v0.13/MANIFEST
===================================================================
--- branches/v0.13/MANIFEST 2009-09-09 00:38:23 UTC (rev 57)
+++ branches/v0.13/MANIFEST 2009-09-09 00:38:53 UTC (rev 58)
@@ -12,6 +12,8 @@
lib/WebFetch/Output/Dump.pm
lib/WebFetch/Output/TT.pm
lib/WebFetch/Output/TWiki.pm
+lib/WebFetch/Data/Store.pm
+lib/WebFetch/Data/Record.pm
t/00-load.t
t/boilerplate.t
t/pod-coverage.t
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-09 00:38:29
|
Revision: 57
http://webfetch.svn.sourceforge.net/webfetch/?rev=57&view=rev
Author: ikluft
Date: 2009-09-09 00:38:23 +0000 (Wed, 09 Sep 2009)
Log Message:
-----------
update tests
Modified Paths:
--------------
branches/v0.13/t/00-load.t
branches/v0.13/t/pod-coverage.t
Modified: branches/v0.13/t/00-load.t
===================================================================
--- branches/v0.13/t/00-load.t 2009-09-09 00:37:27 UTC (rev 56)
+++ branches/v0.13/t/00-load.t 2009-09-09 00:38:23 UTC (rev 57)
@@ -1,9 +1,11 @@
#!perl -T
-use Test::More tests => 8;
+use Test::More tests => 10;
BEGIN {
use_ok( 'WebFetch' );
+ use_ok( 'WebFetch::Data::Store' );
+ use_ok( 'WebFetch::Data::Record' );
use_ok( 'WebFetch::Input::Atom' );
use_ok( 'WebFetch::Input::PerlStruct' );
use_ok( 'WebFetch::Input::RSS' );
Modified: branches/v0.13/t/pod-coverage.t
===================================================================
--- branches/v0.13/t/pod-coverage.t 2009-09-09 00:37:27 UTC (rev 56)
+++ branches/v0.13/t/pod-coverage.t 2009-09-09 00:38:23 UTC (rev 57)
@@ -20,7 +20,9 @@
# test Pod coverage on everything else
my %test_plan = (
"WebFetch" => [ qw( new debug fetch_main2 module_select
- singular_handler init_fname2fnum init_wk2fnum ) ],
+ singular_handler fname2fnum wk2fname wk2fnum ) ],
+ "WebFetch::Data::Store" => [ qw( new init ) ],
+ "WebFetch::Data::Record" => [ qw( new init data ) ],
"WebFetch::Output::TT" => [ qw( new fetch ) ],
"WebFetch::Output::Dump" => [ qw( new fetch ) ],
"WebFetch::Output::TWiki" => [ qw( new fetch ) ],
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-09 00:37:48
|
Revision: 56
http://webfetch.svn.sourceforge.net/webfetch/?rev=56&view=rev
Author: ikluft
Date: 2009-09-09 00:37:27 +0000 (Wed, 09 Sep 2009)
Log Message:
-----------
overhaul API
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/Atom.pm
branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
branches/v0.13/lib/WebFetch/Input/RSS.pm
branches/v0.13/lib/WebFetch/Input/SiteNews.pm
branches/v0.13/lib/WebFetch/Output/TT.pm
branches/v0.13/lib/WebFetch/Output/TWiki.pm
branches/v0.13/lib/WebFetch.pm
Added Paths:
-----------
branches/v0.13/lib/WebFetch/Data/
branches/v0.13/lib/WebFetch/Data/Record.pm
branches/v0.13/lib/WebFetch/Data/Store.pm
Added: branches/v0.13/lib/WebFetch/Data/Record.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Record.pm (rev 0)
+++ branches/v0.13/lib/WebFetch/Data/Record.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -0,0 +1,189 @@
+#
+# WebFetch::Data::Record - WebFetch Embedding API data record
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+
+package WebFetch::Data::Record;
+
+use strict;
+use warnings;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+ 'WebFetch::Data::Record::Exception::AutoloadFailure' => {
+ isa => 'WebFetch::TracedException',
+ alias => 'throw_autoload_fail',
+ description => "AUTOLOAD failed to handle function call",
+ },
+
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Record - Object for management of WebFetch data records/rows
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Record;
+
+WebFetch::Data::Record->mk_field_accessor( $field_name, ... );
+$value = $obj-E<gt>bynum( $num );
+$value = $obj->fieldname;
+$obj->fieldname( $value );
+>
+
+=head1 DESCRIPTION
+
+This module provides read-only access to a single record of the WebFetch data.
+
+=cut
+
+our $AUTOLOAD;
+
+# initialization
+sub init
+{
+ my $self = shift;
+
+ # save parameters
+ $self->{obj} = shift;
+ $self->{num} = shift;
+ $self->{recref} = $self->{obj}{records}[$self->{num}];
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ # make accessor functions
+ my $field;
+ my $class = ref( $self );
+ foreach $field ( @{$self->{obj}{fields}}) {
+ $class->mk_field_accessor( $field );
+ }
+ foreach $field ( keys %{$self->{obj}{wk_names}}) {
+ $class->mk_field_accessor( $field );
+ }
+
+ return $self;
+}
+
+# shortcut function to top-level WebFetch object data
+sub data { return $_[0]->{obj}->data; }
+
+=item $value = $obj->bynum( $field_num );
+
+Returns the value of the field located by the field number provided.
+The first field is numbered 0.
+
+=cut
+
+# get a field by number
+sub bynum
+{
+ my $self = shift;
+ my $f = shift;
+ my $num = $self->{num};
+
+ return $self->{recref}[$f];
+}
+
+=item $class->mk_field_accessor( $field_name, ... );
+
+Creates accessor functions for each field name provided.
+
+=cut
+
+# make field accessor/mutator functions
+sub mk_field_accessor
+{
+ my $class = shift;
+ my $name;
+
+ foreach $name ( @_ ) {
+ no strict 'refs';
+ $class->can( $name ) and next; # skip if function exists!
+
+ # make a closure which keeps value of $name from this call
+ # keep generic so code can use more than one data type per run
+ *{$class."::".$name} = sub {
+ my $self = shift;
+ my $value = shift;
+ my $obj = $self->{obj};
+ my $recref = $self->{recref};
+ my $f;
+ if ( exists $obj->{findex}{$name}) {
+ $f = $obj->{findex}{$name};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } elsif ( exists $obj->{wk_names}{$name}) {
+ my $wk = $obj->{wk_names}{$name};
+ $f = $obj->{findex}{$wk};
+ if ( defined $value ) {
+ my $tmp = $recref->[$f];
+ $recref->[$f] = $value;
+ return $tmp;
+ } else {
+ return $recref->[$f];
+ }
+ } else {
+ return undef;
+ }
+ };
+ }
+}
+
+=item accessor functions
+
+Accessor functions are created for field names and
+well-known names as they are defined.
+
+So a field named "title" can be accessed by an object method of the same
+name, like $obj->title .
+
+=cut
+
+# AUTOLOAD function to provide field accessors/mutators
+sub AUTOLOAD
+{
+ my $self = shift;
+ my $type = ref($self) or throw_autoload_fail "self is not an object";
+
+ my $name = $AUTOLOAD;
+ $name =~ s/.*://; # strip fully-qualified portion, just want function
+
+ # decline all-caps names - reserved for special Perl functions
+ ( $name =~ /^[A-Z]+$/ ) and return;
+
+ WebFetch::debug __PACKAGE__."::AUTOLOAD $name";
+ if (( exists $self->{obj}{findex}{$name})
+ or ( exists $self->{obj}{wk_names}{$name}))
+ {
+ $type->mk_field_accessor( $name );
+ return $self->$name(@_);
+ } else {
+ throw_autoload_fail "no such function or field $name";
+ }
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Added: branches/v0.13/lib/WebFetch/Data/Store.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Data/Store.pm (rev 0)
+++ branches/v0.13/lib/WebFetch/Data/Store.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -0,0 +1,356 @@
+#
+# WebFetch::Data::Store - WebFetch Embedding API top-level data store
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+#
+# The WebFetch Embedding API manages the following data:
+# * {data} - top level hash container (WebFetch::Data::Store)
+# * {fields} - array of field names
+# * {records} - array of data records (WebFetch::Data::Record)
+# * each record is an array of data fields in the order of the field names
+# * {wk_names} - hash of WebFetch well-known fields to actual field names
+# * {feed} - top-level arbitrary info about the feed
+#
+
+package WebFetch::Data::Store;
+
+use strict;
+use warnings;
+use WebFetch;
+use base qw( WebFetch );
+
+# define exceptions/errors
+use Exception::Class (
+);
+
+# no user-servicable parts beyond this point
+
+=head1 NAME
+
+WebFetch::Data::Store - Object for management of WebFetch data
+
+=head1 SYNOPSIS
+
+C<use WebFetch::Data::Store;
+
+$data = webfetch_obj-E<gt>data;
+$data-E<gt>add_fields( "field1", "field2", ... );
+$num = $data-E<gt>num_fields;
+@field_names = $data-E<gt>get_fields;
+$name = $data-E<gt>field_bynum( 3 );
+$data-E<gt>add_wk_names( "title" =E<gt> "heading", "url" =E<gt> "link", ... );
+$value = $data-E<gt>get_feed( $name );
+$data-E<gt>set_feed( $name, $value );
+$data-E<gt>add_record( $field1, $field2, ... ); # order corresponds to add_fields
+$num = $data-E<gt>num_records;
+$record = $data-E<gt>get_record( $n );
+$data-E<gt>reset_pos;
+$record = $data-E<gt>next_record;
+$name = $data-E<gt>wk2fname( $wk_name );
+$num = $data-E<gt>fname2fnum( $field_name );
+$num = $data-E<gt>wk2fnum( $wk_name );
+>
+
+=head1 DESCRIPTION
+
+This module provides access to the WebFetch data.
+WebFetch instantiates the object for the input module.
+The input module uses this to construct the data set from its input.
+The output module uses the this to access the data and
+produce its output object/format.
+
+=cut
+
+# initialization
+sub init
+{
+ my $self = shift;
+ $self->{fields} = [];
+ $self->{findex} = {};
+ $self->{records} = [];
+ $self->{wk_names} = {};
+ $self->{wkindex} = {};
+ $self->{feed} = {};
+
+ # signal WebFetch that Data subclasses do not provide a fetch function
+ $self->{no_fetch} = 1;
+ $self->SUPER::init( @_ );
+
+ return $self;
+}
+
+=item $obj->add_fields( "field1", "field2", ... );
+
+Add the field names in the order their values will appear in the data table.
+
+=cut
+
+# add field names
+sub add_fields
+{
+ my $self = shift;
+ my @fields = @_;
+ my $field;
+ foreach $field ( @fields ) {
+ $self->{findex}{$field} = scalar @{$self->{fields}};
+ push @{$self->{fields}}, $field;
+ }
+}
+
+=item $num = $obj->num_fields;
+
+Returns the number of fields/columns in the data.
+
+=cut
+
+# get number of fields
+sub num_fields
+{
+ my $self = shift;
+ return scalar @{$self->{fields}};
+}
+
+=item @field_names = $obj->get_fields;
+
+Gets a list of the field names in the order their values appear in the data
+table;
+
+=cut
+
+# get field names
+sub get_fields
+{
+ my $self = shift;
+ return keys %{$self->{fields}};
+}
+
+=item $field_name = $obj->field_bynum( $num );
+
+Return a field name string based on the numeric position of the field.
+
+=cut
+
+# get field name by number
+sub field_bynum
+{
+ my $self = shift;
+ my $num = shift;
+ return $self->{fields}[$num];
+}
+
+=item $obj->add_wk_names( "title" => "heading", "url" => "link", ... );
+
+Add associations between WebFetch well-known field names, which allows
+WebFetch to apply meaning to these fields, such as titles, dates and URLs.
+The parameters are pairs of well-known and actual field names.
+Running this function more than once will add to the existing associations
+of well-known to actual field names.
+
+=cut
+
+# add well-known names
+sub add_wk_names
+{
+ my $self = shift;
+ my ( $wk_name, $field );
+
+ while ( @_ >= 2 ) {
+ $wk_name = shift;
+ $field = shift;
+ WebFetch::debug "add_wk_names $wk_name => $field";
+ $self->{wk_names}{$wk_name} = $field;
+ $self->{wkindex}{$wk_name} = $self->{findex}{$field};
+ }
+}
+
+=item $value = $obj->get_feed( $name );
+
+Get an item of per-feed data by name.
+
+=cut
+
+# get feed info
+sub get_feed
+{
+ my $self = shift;
+ my $name = shift;
+ return (exists $self->{$name}) ? $self->{$name} : undef;
+}
+
+=item $obj->set_feed( $name, $value );
+
+Set an item of per-feed data by name and value.
+
+=cut
+
+# set feed info
+sub set_feed
+{
+ my $self = shift;
+ my $name = shift;
+ my $value = shift;
+ my $retval = (exists $self->{$name}) ? $self->{$name} : undef;
+ $self->{$name} = $value;
+ return $retval;
+}
+
+=item $obj->add_record( $value1, $value2, $value3, ... );
+
+Add a row to the end of the data table. Values must correspond to the
+positions of the field names that were provided earlier.
+
+=cut
+
+# add a data record
+# this adds the field values in the same order the field names were added
+sub add_record
+{
+ my $self = shift;
+ push @{$self->{records}}, [ @_ ];
+}
+
+# TODO: add a function add_record_unordered( name => value, ... )
+# less efficient, but may be OK for cases where that doesn't matter
+
+=item $num = $obj->num_records;
+
+Get the number of records/rows in the data table.
+
+=cut
+
+# get the number of data records
+sub num_records
+{
+ my $self = shift;
+ return scalar @{$self->{records}};
+}
+
+=item $record = get_record( $num );
+
+Returns a WebFetch::Data::Record object for the row located
+by the given row number in the data table. The first row is numbered 0.
+Calling this function does not affect the position used by the next_record
+function.
+
+=cut
+
+# get a data record by index
+sub get_record
+{
+ my $self = shift;
+ my $n = shift;
+ WebFetch::debug "get_record $n";
+ require WebFetch::Data::Record;
+ return WebFetch::Data::Record->new( $self, $n );
+}
+
+=item $obj->reset_pos;
+
+Reset the position counter used by the next_record function back to the
+beginning of the data table.
+
+=cut
+
+# reset iterator position
+sub reset_pos
+{
+ my $self = shift;
+
+ WebFetch::debug "reset_pos";
+ delete $self->{pos};
+}
+
+=item $record = $obj->next_record;
+
+The first call to this function returns the first record.
+Each successive call to this function returns the following record until
+the end of the data table.
+After the last record, the function returns undef until
+reset_pos is called to reset it back to the beginning.
+
+=cut
+
+# get next record
+sub next_record
+{
+ my $self = shift;
+
+ # initialize if necessary
+ if ( !exists $self->{pos}) {
+ $self->{pos} = 0;
+ }
+ WebFetch::debug "next_record n=".$self->{pos}." of "
+ .scalar @{$self->{records}};
+
+ # return undef if position is out of bounds
+ ( $self->{pos} < 0 ) and return undef;
+ ( $self->{pos} > scalar @{$self->{records}} - 1 ) and return undef;
+
+ # get record
+ return $self->get_record( $self->{pos}++ );
+}
+
+=item $obj->wk2fname( $wk )
+
+Obtain a field name from a well-known name.
+
+=cut
+
+# convert well-known name to field name
+sub wk2fname
+{
+ my ( $self, $wk ) = @_;
+
+ WebFetch::debug "wk2fname $wk => ".(( exists $self->{wk_names}{$wk}) ? $self->{wk_names}{$wk} : "undef");
+ return ( exists $self->{wk_names}{$wk})
+ ? $self->{wk_names}{$wk}
+ : undef;
+}
+
+=item $obj->fname2fnum( $fname )
+
+Obtain a field number from a field name.
+
+=cut
+
+# convert a field name to a field number
+sub fname2fnum
+{
+ my ( $self, $fname ) = @_;
+
+ WebFetch::debug "fname2fnum $fname => ".(( exists $self->{findex}{$fname}) ? $self->{findex}{$fname} : "undef" );
+ return ( exists $self->{findex}{$fname})
+ ? $self->{findex}{$fname}
+ : undef;
+}
+
+=item $obj->wk2fnum( $wk )
+
+Obtain a field number from a well-known name.
+
+=cut
+
+# convert well-known name to field number
+sub wk2fnum
+{
+ my ( $self, $wk ) = @_;
+
+ WebFetch::debug "wk2fnum $wk => ".(( exists $self->{wkindex}{$wk}) ? $self->{wkindex}{$wk} : "undef" );
+ return ( exists $self->{wkindex}{$wk})
+ ? $self->{wkindex}{$wk}
+ : undef;
+}
+
+1;
+__END__
+=head1 AUTHOR
+
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
+
+=head1 SEE ALSO
+
+L<WebFetch>, L<WebFetch::Data::Record>
Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -80,21 +80,17 @@
my ( $self ) = @_;
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "id", "updated", "title", "author", "link",
- "summary", "content", "xml" ];
+ $self->data->add_fields( "id", "updated", "title", "author", "link",
+ "summary", "content", "xml" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"id" => "id",
"title" => "title",
"url" => "link",
"date" => "updated",
"summary" => "summary",
- };
- $self->{data}{records} = [];
+ );
- # process the links
-
# parse data file
$self->parse_input();
@@ -143,9 +139,8 @@
my $summary = extract_value( $entry->summary() );
my $content = extract_value( $entry->content() );
my $xml = $entry->as_xml();
- push @{$self->{data}{records}},
- [ $id, $updated, $title, $author, $link, $summary,
- $content, $xml ];
+ $self->data->add_record( $id, $updated, $title,
+ $author, $link, $summary, $content, $xml );
}
}
Modified: branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -44,45 +44,21 @@
if ( !defined $self->{content}) {
throw_nostruct "content struct does not exist";
}
- if ( ref($self->{content}) eq "HASH" ) {
+ if ( ref($self->{content})->isa( "WebFetch::Data::Store" )) {
+ $self->{data} = $self->{content};
+ return;
+ } elsif ( ref($self->{content}) eq "HASH" ) {
if (( exists $self->{content}{fields})
and ( exists $self->{content}{records})
and ( exists $self->{content}{wk_names}))
{
- # it's already in WebFetch Embedding data format
- $self->{data} = $self->{content};
+ $self->data->{fields} = $self->{content}{fields};
+ $self->data->{wk_names} = $self->{content}{wk_names};
+ $self->data->{records} = $self->{content}{records};
return;
}
}
- if ( ref($self->{content}) ne "ARRAY" ) {
- throw_badstruct "content is not an ARRAY ref "
- ."or WebFetch Embedding API data";
- }
-
- # build data structure
- $self->{data} = {};
- $self->{data}{fields} = [ sort keys %{$self->{content}[0]}];
- $self->{data}{records} = [];
- my ( $entry, $field );
- foreach $entry ( @{$self->{content}}) {
- my @new_entry;
- foreach $field ( @{$self->{data}{fields}}) {
- push @new_entry, ( exists $entry->{$field})
- ? $entry->{$field} : "";
- }
- push @{$self->{data}{records}}, \@new_entry;
- }
-
- # map the well-known field names
- if ( defined $self->{wk_names}) {
- $self->{data}{wk_names} = $self->{wk_names};
- } else {
- # if not provided, fake it the best we can
- $self->{data}{wk_names} = {};
- foreach $field ( @{$self->{data}{fields}}) {
- $self->{data}{wk_names}{$field} = $field;
- }
- }
+ throw_badstruct "content should be a WebFetch::Data::Store";
}
1;
Modified: branches/v0.13/lib/WebFetch/Input/RSS.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -54,18 +54,16 @@
my ( $self ) = @_;
# set up Webfetch Embedding API data
- $self->{data} = {};
- $self->{data}{fields} = [ "pubDate", "title", "link", "category",
- "description" ];
+ $self->data->add_fields( "pubDate", "title", "link", "category",
+ "description" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "link",
"date" => "pubDate",
"summary" => "description",
"category" => "category",
- };
- $self->{data}{records} = [];
+ );
# parse data file
$self->parse_input();
@@ -163,8 +161,8 @@
? $item->{category} : "";
my $description = ( defined $item->{description})
? $item->{description} : "";
- push @{$self->{data}{records}},
- [ $pub_date, $title, $link, $category, $description ];
+ $self->data->add_record( $pub_date, $title, $link,
+ $category, $description );
$pos++;
}
}
Modified: branches/v0.13/lib/WebFetch/Input/SiteNews.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -84,19 +84,17 @@
}
# set up Webfetch Embedding API data
- $self->{data} = {};
$self->{actions} = {};
- $self->{data}{fields} = [ "date", "title", "priority", "expired",
- "position", "label", "url", "category", "text" ];
+ $self->data->add_fields( "date", "title", "priority", "expired",
+ "position", "label", "url", "category", "text" );
# defined which fields match to which "well-known field names"
- $self->{data}{wk_names} = {
+ $self->data->add_wk_names(
"title" => "title",
"url" => "url",
"date" => "date",
"summary" => "text",
"category" => "category"
- };
- $self->{data}{records} = [];
+ );
# process the links
@@ -263,10 +261,10 @@
my $text = ( defined $item->{text}) ? $item->{text} : "";
my $url_prefix = ( defined $self->{url_prefix})
? $self->{url_prefix} : "";
- push @{$self->{data}{records}},
- [ printstamp($posted), $title, priority( $item ),
+ $self->data->add_record(
+ printstamp($posted), $title, priority( $item ),
expired( $item ), $pos, $label,
- $url_prefix."#".$label, $category, $text ];
+ $url_prefix."#".$label, $category, $text );
$pos++;
}
}
Modified: branches/v0.13/lib/WebFetch/Output/TT.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -80,7 +80,8 @@
my $template = Template->new( \%tt_config );
# process template
- $template->process( $self->{template}, $self->{data}, \$output )
+ $template->process( $self->{template}, { data => $self->{data}},
+ \$output, { binmode => ':utf8'} )
or throw_template $template->error();
$self->raw_savable( $filename, $output );
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -50,6 +50,11 @@
description => "WebFetch::Output::TWiki returned errors from "
."saving one or more entries",
},
+ "WebFetch::Output::TWiki::Exception::NoIdentifier" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_no_id_field",
+ description => "no identifier field could be found",
+ },
);
=head1 NAME
@@ -249,10 +254,13 @@
my $id_field = $self->wk2fnum( "id" );
if ( ! defined $id_field ) {
$id_field = $self->wk2fnum( "url" );
- } elsif ( ! defined $id_field ) {
+ }
+ if ( ! defined $id_field ) {
$id_field = $self->wk2fnum( "title" );
}
- WebFetch::debug "write_to_twiki: id_field=$id_field";
+ if ( ! defined $id_field ) {
+ throw_no_id_field "no usable identifier field was found";
+ }
# open DB file for tracking unique IDs of articles already processed
my %id_index;
@@ -272,13 +280,14 @@
# create topics with metadata from each WebFetch data record
my $entry;
my @oopses;
- foreach $entry ( @{$self->{data}{records}}) {
+ $self->data->reset_pos;
+ while ( $entry = $self->data->next_record ) {
# check that this entry hasn't already been forwarded to TWiki
- if ( exists $id_index{$entry->[$id_field]}) {
+ if ( exists $id_index{$entry->bynum( $id_field )}) {
next;
}
- $id_index{$entry->[$id_field]} = time;
+ $id_index{$entry->bynum( $id_field )} = time;
# select topic name
my $topicname = sprintf $tnum_format, $tnum_counter;
@@ -298,20 +307,21 @@
{ name => $config->{parent}});
$meta->put( "FORM", { name => $config->{form}});
my $fnum;
- for ( $fnum = 0; $fnum <= @{$self->{data}{fields}}; $fnum++ ) {
- WebFetch::debug "meta: ".$self->{data}{fields}[$fnum]
- ." = ".$entry->[$fnum];
- ( defined $self->{data}{fields}[$fnum]) or next;
- ( $self->{data}{fields}[$fnum] eq "xml") and next;
- ( defined $entry->[$fnum]) or next;
+ for ( $fnum = 0; $fnum <= $self->data->num_fields; $fnum++ ) {
+ WebFetch::debug "meta: "
+ .$self->data->field_bynum($fnum)
+ ." = ".$entry->bynum($fnum);
+ ( defined $self->data->field_bynum($fnum)) or next;
+ ( $self->data->field_bynum($fnum) eq "xml") and next;
+ ( defined $entry->bynum($fnum)) or next;
WebFetch::debug "meta: OK";
$meta->putKeyed( "FIELD", {
- name => $self->{data}{fields}[$fnum],
- value => $entry->[$fnum] });
+ name => $self->data->field_bynum($fnum),
+ value => $entry->bynum($fnum)});
}
# save a special title field for TWiki indexes
- my $index_title = $entry->[$self->wk2fnum( "title" )];
+ my $index_title = $entry->title;
$index_title =~ s/[\t\r\n\|]+/ /gs;
$index_title =~ s/^\s*//;
$index_title =~ s/\s*$//;
@@ -329,8 +339,8 @@
$topicname, $meta, $text );
if ( $oopsurl ) {
WebFetch::debug "write_to_twiki: $topicname - $oopsurl";
- push @oopses, $entry->[$self->wk2fnum( "title" ) or 0]
- ." -> ".$topicname." ".$oopsurl;
+ push @oopses, $entry->title." -> "
+ .$topicname." ".$oopsurl;
}
}
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-07 21:51:29 UTC (rev 55)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-09 00:37:27 UTC (rev 56)
@@ -100,12 +100,10 @@
use strict;
-use Carp;
use Getopt::Long;
use LWP::UserAgent;
use HTTP::Request;
use Date::Calc;
-use Data::Dumper;
# define exceptions/errors
use Exception::Class (
@@ -114,6 +112,12 @@
isa => 'WebFetch::Exception',
},
+ 'WebFetch::Exception::DataWrongType' => {
+ isa => 'WebFetch::TracedException',
+ alias => 'throw_data_wrongtype',
+ description => "provided data must be a WebFetch::Data::Store",
+ },
+
'WebFetch::Exception::GetoptError' => {
isa => 'WebFetch::Exception',
alias => 'throw_getopt_error',
@@ -174,16 +178,16 @@
description => "no module was found to run the request",
},
- 'WebFetch::Exception::AutoRunFailure' => {
+ 'WebFetch::Exception::AutoloadFailure' => {
isa => 'WebFetch::TracedException',
alias => 'throw_autoload_fail',
- description => "AUTORUN failed to handle function call",
+ description => "AUTOLOAD failed to handle function call",
},
);
# initialize class variables
-our $VERSION = '0.13-pre29';
+our $VERSION = '0.13-pre39';
our %default_modules = (
"input" => {
"rss" => "WebFetch::Input::RSS",
@@ -558,7 +562,16 @@
# go fetch the data
# this function must be provided by a derived module
+ # non-fetching modules (i.e. data) must define $self->{no_fetch}=1
if (( ! exists $self->{no_fetch}) or ! $self->{no_fetch}) {
+ require WebFetch::Data::Store;
+ if ( exists $self->{data}) {
+ $self->{data}->isa( "WebFetch::Data::Store" )
+ or throw_data_wrongtype "object data must be "
+ ."a WebFetch::Data::Store";
+ } else {
+ $self->{data} = WebFetch::Data::Store->new();
+ }
$self->fetch();
}
@@ -990,7 +1003,7 @@
foreach $entry ( @{$self->{actions}{$action_spec}}) {
# parameters must be in an ARRAY ref
if (ref $entry ne "ARRAY" ) {
- carp "warning: entry in action spec "
+ warn "warning: entry in action spec "
."\"".$action_spec."\""
."expected to be ARRAY, found "
.(ref $entry)." instead "
@@ -1007,7 +1020,7 @@
# it will be reported by $self->save()
}
} else {
- carp "warning: action \"$action_spec\" specified but "
+ warn "warning: action \"$action_spec\" specified but "
."\&{\$self->$action_handler}() "
."not defined in "
.(ref $self)." - ignored\n";
@@ -1325,7 +1338,6 @@
if ( $self->{debug} ) {
print STDERR "entering save()\n";
- #Dumper($self);
}
# check if we have attributes needed to proceed
@@ -1344,10 +1356,11 @@
if (( exists $self->{fetch_urls}) and $self->{fetch_urls}) {
my $url_fnum = $self->wk2fnum( "url" );
my $entry;
- foreach $entry ( @{$self->{data}{records}}) {
- if ( defined $entry->[$url_fnum]) {
- $self->direct_fetch_savable(
- $entry->[$url_fnum]);
+ $self->data->reset_pos;
+ while ( $entry = $self->data->next_record()) {
+ my $url = $entry->url;
+ if ( defined $url ) {
+ $self->direct_fetch_savable( $entry->url );
}
}
}
@@ -1356,8 +1369,8 @@
my $savable;
foreach $savable ( @{$self->{savable}}) {
- if ( $self->{debug} ) {
- print STDERR "saving ".$savable->{file}."\n";
+ if ( exists $savable->{file}) {
+ debug "saving ".$savable->{file}."\n";
}
# an output module may have handled a more intricate operation
@@ -1431,9 +1444,8 @@
}
# write content to the "new content" file
- if ( ! open ( new_content, ">$new_content" )) {
- $savable->{error} = "cannot open "
- .$new_content.": $!";
+ if ( ! open ( new_content, ">:utf8", "$new_content" )) {
+ $savable->{error} = "cannot open $new_content: $!";
next;
}
if ( !print new_content $savable->{content}) {
@@ -1529,148 +1541,14 @@
}
#
-# functions to support format handlers
+# shortcuts to data object functions
#
-# initialize an internal hash of field names to field numbers
-sub init_fname2fnum
-{
- my ( $self ) = @_;
+sub data { my $self = shift; return $self->{data}; }
+sub wk2fname { my $self = shift; return $self->data->wk2fname( @_ )};
+sub fname2fnum { my $self = shift; return $self->data->fname2fnum( @_ )};
+sub wk2fnum { my $self = shift; return $self->data->wk2fnum( @_ )};
- # check if fname2fnum is already initialized
- if (( exists $self->{fname2fnum})
- and ref $self->{fname2fnum} eq "HASH" )
- {
- # already done - success
- return 1;
- }
-
- # check if prerequisite data exists
- if (( ! exists $self->{data} )
- or ( ! exists $self->{data}{fields}))
- {
- # missing prerequisites - failed
- return 0;
- }
-
- # initialize the fname2fnum hash
- my $i;
- $self->{fname2fnum} = {};
- for ( $i=0; $i < scalar(@{$self->{data}{fields}}); $i++ ) {
- # put the field number in as the value for the hash
- $self->{fname2fnum}{$self->{data}{fields}[$i]} = $i;
- }
-
- # OK, done
- return 1;
-}
-
-# initialize an internal hash of well-known names to field numbers
-sub init_wk2fnum
-{
- my ( $self ) = @_;
-
- $self->init_fname2fnum() or return 0;
-
- # check if wk2fnum is already initialized
- if (( exists $self->{wk2fnum})
- and ref $self->{wk2fnum} eq "HASH" )
- {
- # already done - success
- return 1;
- }
-
- # check for prerequisite data
- if ( ! exists $self->{data}{wk_names}) {
- return 0;
- }
-
- my $wk_key;
- $self->{wk2fnum} = {};
- foreach $wk_key ( keys %{$self->{data}{wk_names}}) {
- # perform consistency cross-check between wk_names and fields
- if ( !exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}})
- {
- # wk_names has a bad field name - carp about it!
- carp "warning: wk_names contains $wk_key"."->"
- .$self->{data}{wk_names}{$wk_key}
- ." but "
- .$self->{data}{wk_names}{$wk_key}
- ." is not in the fields list - ignored\n";
- } else {
- # it's OK - put it in the table
- $self->{wk2fnum}{$wk_key} =
- $self->{fname2fnum}{$self->{data}{wk_names}{$wk_key}};
- }
- }
- return 1;
-}
-
-=item $obj->wk2fname( $wk )
-
-=cut
-
-# convert well-known name to field name
-sub wk2fname
-{
- my ( $self, $wk ) = @_;
-
- $self->init_fname2fnum() or return undef;
-
- # check for prerequisite data
- if (( ! exists $self->{data}{wk_names})
- or ( ! exists $self->{data}{wk_names}{$wk}))
- {
- return undef;
- }
-
- # double check that the field exists before pronouncing it OK
- # (perform consistency cross-check between wk_names and fields)
- if ( exists $self->{fname2fnum}{$self->{data}{wk_names}{$wk}}) {
- return $self->{data}{wk_names}{$wk};
- }
-
- # otherwise, wk_names has a bad field name.
- # But init_wk2fnum() may have already carped about it
- # so check whether we need to carp about it or not.
- if ( ! exists $self->{wk2fnum}) {
- carp "warning: wk_names contains $wk"."->"
- .$self->{data}{wk_names}{$wk}
- ." but "
- .$self->{data}{wk_names}{$wk}
- ." is not in the fields list - ignored\n";
- }
- return undef;
-}
-
-=item $obj->fname2fnum( $fname )
-
-=cut
-
-# convert a field name to a field number
-sub fname2fnum
-{
- my ( $self, $fname ) = @_;
-
- $self->init_fname2fnum() or return undef;
- return ( exists $self->{fname2fnum}{$fname})
- ? $self->{fname2fnum}{$fname} : undef;
-}
-
-=item $obj->wk2fnum( $wk )
-
-=cut
-
-# convert well-known name to field number
-sub wk2fnum
-{
- my ( $self, $wk ) = @_;
-
- $self->init_wk2fnum() or return undef;
- return ( exists $self->{wk2fnum}{$wk})
- ? $self->{wk2fnum}{$wk} : undef;
-}
-
=item AUTOLOAD functionality
When a WebFetch input object is passed to an output class, operations
@@ -1693,12 +1571,9 @@
$name =~ s/.*://; # strip fully-qualified portion, just want function
# decline all-caps names - reserved for special Perl functions
- # if Perl core didn't handle this name, we won't mess with it either
my ( $package, $filename, $line ) = caller;
- if ( $name =~ /^[A-Z]+$/ ) {
- throw_autoload_fail "reserved function $name declined "
- ." - called by $package ($filename line $line)";
- }
+ ( $name =~ /^[A-Z]+$/ ) and return;
+ debug __PACKAGE__."::AUTOLOAD $name";
# check for function in caller package
# (WebFetch may hand an input module's object to an output module)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 21:51:40
|
Revision: 55
http://webfetch.svn.sourceforge.net/webfetch/?rev=55&view=rev
Author: ikluft
Date: 2009-09-07 21:51:29 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
streamline autoload, exceptions
Modified Paths:
--------------
branches/v0.13/lib/WebFetch.pm
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-07 01:56:55 UTC (rev 54)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-07 21:51:29 UTC (rev 55)
@@ -146,6 +146,7 @@
'WebFetch::Exception::MustOverride' => {
isa => 'WebFetch::TracedException',
+ alias => 'throw_abstract',
description => "A WebFetch function was called which is "
."supposed to be overridden by a subclass",
},
@@ -1120,8 +1121,7 @@
# placeholder for fetch routines by derived classes
sub fetch
{
- WebFetch::Exception::MustOverride->throw(
- "fetch() function must be overridden by a derived module\n" );
+ throw_abstract "fetch is an abstract function and must be overridden";
}
@@ -1671,32 +1671,45 @@
? $self->{wk2fnum}{$wk} : undef;
}
-=item AUTOLOAD
+=item AUTOLOAD functionality
+When a WebFetch input object is passed to an output class, operations
+on $self would not usually work. WebFetch subclasses are considered to be
+cooperating with each other. So WebFetch provides AUTOLOAD functionality
+to catch undefined function calls for its subclasses. If the calling
+class provides a function by the name that was attempted, then it will
+be redirected there.
+
=cut
# autoloader catches calls to unknown functions
-# first try: redirect to the class which made the call, if the function exists
-# second try: act as a read-only accessor for object data
-# (want a read/write accessor? define the function explicitly)
+# redirect to the class which made the call, if the function exists
sub AUTOLOAD
{
my $self = shift;
my $type = ref($self) or throw_autoload_fail "self is not an object";
my $name = $AUTOLOAD;
- $name =~ s/.*://; # strip fully-qualified portion
+ $name =~ s/.*://; # strip fully-qualified portion, just want function
- # skip all-caps special Perl functions
+ # decline all-caps names - reserved for special Perl functions
+ # if Perl core didn't handle this name, we won't mess with it either
+ my ( $package, $filename, $line ) = caller;
if ( $name =~ /^[A-Z]+$/ ) {
- return;
+ throw_autoload_fail "reserved function $name declined "
+ ." - called by $package ($filename line $line)";
}
# check for function in caller package
# (WebFetch may hand an input module's object to an output module)
- my ( $package, $filename, $line ) = caller;
if ( $package->can( $name )) {
- my $retval = eval $package."::".$name."( \$self, \@_ )";
+ # make an alias of the sub
+ {
+ no strict 'refs';
+ *{__PACKAGE__."::".$name} = \&{$package."::".$name};
+ }
+ #my $retval = eval $package."::".$name."( \$self, \@_ )";
+ my $retval = eval { $self->$name( @_ ); };
if ( $@ ) {
my $e = Exception::Class->caught();
ref $e ? $e->rethrow
@@ -1706,14 +1719,6 @@
return $retval;
}
- # act as a read-only accessor
- # add write accessors when API can specify what's OK to write
- if ( exists $self->{$name}) {
- # define the sub for better efficiency next time
- eval "sub WebFetch::$name { return \$_[0]->{$name}; }";
- return $self->{$name};
- }
-
# if we got here, we failed
throw_autoload_fail "function $name not found - "
."called by $package ($filename line $line)";
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:57:22
|
Revision: 54
http://webfetch.svn.sourceforge.net/webfetch/?rev=54&view=rev
Author: ikluft
Date: 2009-09-07 01:56:55 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
fix tests - oops they were missing from the MANIFEST!
Modified Paths:
--------------
branches/v0.13/MANIFEST
branches/v0.13/t/00-load.t
branches/v0.13/t/pod-coverage.t
Modified: branches/v0.13/MANIFEST
===================================================================
--- branches/v0.13/MANIFEST 2009-09-07 01:55:58 UTC (rev 53)
+++ branches/v0.13/MANIFEST 2009-09-07 01:56:55 UTC (rev 54)
@@ -11,3 +11,8 @@
lib/WebFetch/Input/SiteNews.pm
lib/WebFetch/Output/Dump.pm
lib/WebFetch/Output/TT.pm
+lib/WebFetch/Output/TWiki.pm
+t/00-load.t
+t/boilerplate.t
+t/pod-coverage.t
+t/pod.t
Modified: branches/v0.13/t/00-load.t
===================================================================
--- branches/v0.13/t/00-load.t 2009-09-07 01:55:58 UTC (rev 53)
+++ branches/v0.13/t/00-load.t 2009-09-07 01:56:55 UTC (rev 54)
@@ -1,6 +1,6 @@
#!perl -T
-use Test::More tests => 7;
+use Test::More tests => 8;
BEGIN {
use_ok( 'WebFetch' );
@@ -10,6 +10,7 @@
use_ok( 'WebFetch::Input::SiteNews' );
use_ok( 'WebFetch::Output::Dump' );
use_ok( 'WebFetch::Output::TT' );
+ use_ok( 'WebFetch::Output::TWiki' );
}
diag( "Testing WebFetch $WebFetch::VERSION, Perl $], $^X" );
Modified: branches/v0.13/t/pod-coverage.t
===================================================================
--- branches/v0.13/t/pod-coverage.t 2009-09-07 01:55:58 UTC (rev 53)
+++ branches/v0.13/t/pod-coverage.t 2009-09-07 01:56:55 UTC (rev 54)
@@ -1,6 +1,6 @@
use strict;
use warnings;
-use Test::More skip_all => "to-do";
+use Test::More;
# Ensure a recent version of Test::Pod::Coverage
my $min_tpc = 1.08;
@@ -15,4 +15,27 @@
plan skip_all => "Pod::Coverage $min_pc required for testing POD coverage"
if $@;
-all_pod_coverage_ok();
+# test plan
+# list in each class the functions considered private
+# test Pod coverage on everything else
+my %test_plan = (
+ "WebFetch" => [ qw( new debug fetch_main2 module_select
+ singular_handler init_fname2fnum init_wk2fnum ) ],
+ "WebFetch::Output::TT" => [ qw( new fetch ) ],
+ "WebFetch::Output::Dump" => [ qw( new fetch ) ],
+ "WebFetch::Output::TWiki" => [ qw( new fetch ) ],
+ "WebFetch::Input::RSS" => [ qw( new fetch extract_value parse_input
+ parse_rss printstamp ) ],
+ "WebFetch::Input::Atom" => [ qw( new fetch extract_value
+ parse_input ) ],
+ "WebFetch::Input::PerlStruct" => [ qw( new fetch ) ],
+ "WebFetch::Input::SiteNews" => [ qw( new fetch attr_state expired
+ parse_input printstamp priority text_state initial_state ) ],
+);
+plan tests => scalar keys %test_plan;
+
+my $mod;
+foreach $mod ( sort keys %test_plan ) {
+ my $regex = '^('.join( '|', @{$test_plan{$mod}} ).')$';
+ pod_coverage_ok( $mod, { also_private => [qr/$regex/]});
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:56:14
|
Revision: 53
http://webfetch.svn.sourceforge.net/webfetch/?rev=53&view=rev
Author: ikluft
Date: 2009-09-07 01:55:58 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
docs update
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/Atom.pm
branches/v0.13/lib/WebFetch/Input/RSS.pm
branches/v0.13/lib/WebFetch/Output/Dump.pm
branches/v0.13/lib/WebFetch/Output/TWiki.pm
branches/v0.13/lib/WebFetch.pm
Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-07 01:55:06 UTC (rev 52)
+++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-07 01:55:58 UTC (rev 53)
@@ -19,12 +19,56 @@
use Exception::Class (
);
+=head1 NAME
+
+WebFetch::Input::Atom - WebFetch input from Atom feeds
+
+=head1 SYNOPSIS
+
+This is an input module for WebFetch which accesses an Atom feed.
+The --source parameter contains the URL of the feed.
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
+ --source atom-feed-url [...WebFetch output options...]>
+
+In perl scripts:
+
+ use WebFetch::Input::Atom;
+
+ my $obj = WebFetch->new(
+ "dir" => "/path/to/fetch/workspace",
+ "source" => "http://search.twitter.com/search.atom?q=%23twiki",
+ "source_format" => "atom",
+ "dest" => "dump",
+ "dest_format" = "/path/to/dump/file",
+ );
+ $obj->do_actions; # process output
+ $obj->save; # save results
+
+
+=head1 DESCRIPTION
+
+This module gets the current headlines from a site-local file.
+
+The I<--input> parameter specifies a file name which contains news to be
+posted. See L<"FILE FORMAT"> below for details on contents to put in the
+file. I<--input> may be specified more than once, allowing a single news
+output to come from more than one input. For example, one file could be
+manually maintained in CVS or RCS and another could be entered from a
+web form.
+
+After this runs, the file C<site_news.html> will be created or replaced.
+If there already was a C<site_news.html> file, it will be moved to
+C<Osite_news.html>.
+
+=cut
+
+
our @Options = ();
our $Usage = "";
-# configuration parameters
-our $num_links = 5;
-
# no user-servicable parts beyond this point
# register capabilities with WebFetch
@@ -35,15 +79,6 @@
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- if ( !defined $self->{num_links}) {
- $self->{num_links} = $WebFetch::Input::Atom::num_links;
- }
- if ( !defined $self->{style}) {
- $self->{style} = {};
- $self->{style}{para} = 1;
- }
-
# set up Webfetch Embedding API data
$self->{data} = {};
$self->{data}{fields} = [ "id", "updated", "title", "author", "link",
@@ -118,36 +153,6 @@
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::Atom - download and save an Atom feed
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::Atom;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
- --source atom-feed-url [...WebFetch output options...]>
-
-=head1 DESCRIPTION
-
-This module gets the current headlines from a site-local file.
-
-The I<--input> parameter specifies a file name which contains news to be
-posted. See L<"FILE FORMAT"> below for details on contents to put in the
-file. I<--input> may be specified more than once, allowing a single news
-output to come from more than one input. For example, one file could be
-manually maintained in CVS or RCS and another could be entered from a
-web form.
-
-After this runs, the file C<site_news.html> will be created or replaced.
-If there already was a C<site_news.html> file, it will be moved to
-C<Osite_news.html>.
-
=head1 Atom FORMAT
Atom is an XML format defined at http://atompub.org/rfc4287.html
Modified: branches/v0.13/lib/WebFetch/Input/RSS.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-07 01:55:06 UTC (rev 52)
+++ branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-07 01:55:58 UTC (rev 53)
@@ -19,6 +19,12 @@
use Exception::Class (
);
+=head1 NAME
+
+WebFetch::Input::RSS - download and save an RSS feed
+
+=cut
+
our @Options = ();
our $Usage = "";
@@ -29,6 +35,19 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "input:rss" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Input::RSS;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::RSS -e "&fetch_main" -- --dir directory
+ --source rss-feed-url [...WebFetch output options...]>
+
+=cut
+
# called from WebFetch main routine
sub fetch
{
@@ -150,42 +169,10 @@
}
}
-#---------------------------------------------------------------------------
-
-#
-# utility functions
-#
-
-# generate a printable version of the datestamp
-sub printstamp
-{
- my ( $stamp ) = @_;
- my ( $year, $mon, $day ) = ( $stamp =~ /^(....)(..)(..)/ );
-
- return Month_to_Text(int($mon))." ".int($day).", $year";
-}
-
-#---------------------------------------------------------------------------
-
1;
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::RSS - download and save an RSS feed
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::RSS;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::RSS -e "&fetch_main" -- --dir directory
- --source rss-feed-url [...WebFetch output options...]>
-
=head1 DESCRIPTION
This module gets the current headlines from a site-local file.
Modified: branches/v0.13/lib/WebFetch/Output/Dump.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/Dump.pm 2009-09-07 01:55:06 UTC (rev 52)
+++ branches/v0.13/lib/WebFetch/Output/Dump.pm 2009-09-07 01:55:58 UTC (rev 53)
@@ -20,7 +20,12 @@
use Exception::Class (
);
+=head1 NAME
+WebFetch::Output::Dump - save data in a Perl structure dump
+
+=cut
+
# set defaults
our ( @url, $cat_priorities, $now, $nowstamp );
@@ -35,23 +40,6 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "output:dump" );
-# Perl structure dump format handler
-sub fmt_handler_dump
-{
- my ( $self, $filename ) = @_;
-
- $self->raw_savable( $filename, Dumper( $self->{data}));
- 1;
-}
-
-1;
-__END__
-# POD docs follow
-
-=head1 NAME
-
-WebFetch::Output::Dump - save data in a Perl structure dump
-
=head1 SYNOPSIS
In perl scripts:
@@ -65,10 +53,30 @@
=head1 DESCRIPTION
-This module gets the current news headlines from a site-local file.
+This is an output module for WebFetch which simply outputs a Perl
+structure dump from C<Data::Dumper>. It can be read again by a Perl
+script using C<eval>.
-TODO: add description
+=item $obj->fmt_handler_dump( $filename )
+This function dumps the data into a string for saving by the WebFetch::save()
+function.
+
+=cut
+
+# Perl structure dump format handler
+sub fmt_handler_dump
+{
+ my ( $self, $filename ) = @_;
+
+ $self->raw_savable( $filename, Dumper( $self->{data}));
+ 1;
+}
+
+1;
+__END__
+# POD docs follow
+
=head1 AUTHOR
WebFetch was written by Ian Kluft
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-07 01:55:06 UTC (rev 52)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-07 01:55:58 UTC (rev 53)
@@ -54,7 +54,7 @@
=head1 NAME
-WebFetch::Output::TWiki - The great new WebFetch::Output::TWiki!
+WebFetch::Output::TWiki - WebFetch output to TWiki web site
=cut
@@ -72,19 +72,32 @@
=head1 SYNOPSIS
-Quick summary of what the module does.
+This is an output module for WebFetch which places the data in pages
+on a TWiki web site. Some of its configuration information is read from
+a TWiki page. Calling or command-line parameters point to the TWiki page
+which has the configuration and a search key to locate the correct line
+in a table.
-Perhaps a little code snippet.
+From the command line...
- use WebFetch::Output::TWiki;
+ perl -w -I$libdir -MWebFetch::Input::Atom -MWebFetch::Output::TWiki -e "&fetch_main" -- --dir "/path/to/fetch/worskspace" --source "http://search.twitter.com/search.atom?q=%23twiki" --dest=twiki --twiki_root=/var/www/twiki --config_topic=Feeds.WebFetchConfig --config_key=twiki
- my $foo = WebFetch::Output::TWiki->new();
- ...
+From Perl code...
-=head1 EXPORT
+ use WebFetch;
-A list of functions that can be exported. You can delete this section
-if you don't export anything, such as for a purely object-oriented module.
+ my $obj = WebFetch->new(
+ "dir" => "/path/to/fetch/workspace",
+ "source" => "http://search.twitter.com/search.atom?q=%23twiki",
+ "source_format" => "atom",
+ "dest" => "twiki",
+ "dest_format" = "twiki",
+ "twiki_root" => "/var/www/twiki",
+ "config_topic" => "Feeds.WebFetchConfig",
+ "config_key" => "twiki",
+ );
+ $obj->do_actions; # process output
+ $obj->save; # save results
=head1 FUNCTIONS
@@ -348,10 +361,24 @@
1;
}
+=head1 TWiki software
+TWiki is a wiki (user-editable web site) with features enabling
+collaboration in an enterprise environment.
+It implements the concept of a "structured wiki", allowing structure
+and automation as needed and retaining the informality of a wiki.
+Automated input/updates such as from WebFetch::Output::TWiki is one example.
+
+See http://twiki.org/ for the Open Source community-maintained software
+or http://twiki.net/ for enterprise support.
+
+WebFetch::Output::TWiki was developed for TWiki Inc (formerly TWiki.Net).
+
=head1 AUTHOR
-Ian Kluft, C<< <ikluft at cpan.org> >>
+WebFetch was written by Ian Kluft
+Send patches, bug reports, suggestions and questions to
+C<ma...@we...>.
=head1 BUGS
@@ -359,50 +386,17 @@
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WebFetch-Output-TWiki>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
+=head1 SEE ALSO
+=for html
+<a href="WebFetch.html">WebFetch</a>
+=for text
+WebFetch
-=head1 SUPPORT
+=for man
+WebFetch
-You can find documentation for this module with the perldoc command.
-
- perldoc WebFetch::Output::TWiki
-
-
-You can also look for information at:
-
-=over 4
-
-=item * RT: CPAN's request tracker
-
-L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WebFetch-Output-TWiki>
-
-=item * AnnoCPAN: Annotated CPAN documentation
-
-L<http://annocpan.org/dist/WebFetch-Output-TWiki>
-
-=item * CPAN Ratings
-
-L<http://cpanratings.perl.org/d/WebFetch-Output-TWiki>
-
-=item * Search CPAN
-
-L<http://search.cpan.org/dist/WebFetch-Output-TWiki/>
-
-=back
-
-
-=head1 ACKNOWLEDGEMENTS
-
-
-=head1 COPYRIGHT & LICENSE
-
-Copyright 2009 Ian Kluft, all rights reserved.
-
-This program is free software; you can redistribute it and/or modify it
-under the same terms as Perl itself.
-
-
=cut
1; # End of WebFetch::Output::TWiki
Modified: branches/v0.13/lib/WebFetch.pm
===================================================================
--- branches/v0.13/lib/WebFetch.pm 2009-09-07 01:55:06 UTC (rev 52)
+++ branches/v0.13/lib/WebFetch.pm 2009-09-07 01:55:58 UTC (rev 53)
@@ -375,10 +375,7 @@
# This eliminates the need for the sub-packages to export their own
# fetch_main(), which users found conflicted with each other when
# loading more than one WebFetch-derived module.
-=head2 eval_wrapper ( $code, $throw_func, [ name => value, ...] )
-=cut
-
# fetch_main - eval wrapper for fetch_main2 to catch and display errors
sub main::fetch_main
{
@@ -534,12 +531,18 @@
}
}
-=item Do not use the new() function directly from WebFetch.
+=item $obj = WebFetch::new( param => "value", [...] )
-I<Use the C<new> function from a derived class>, not directly from WebFetch.
-The WebFetch module itself is just infrastructure for the other modules,
-and contains none of the details needed to complete any specific fetches.
+Generally, the new function should be inherited and used from a derived
+class. However, WebFetch provides an AUTOLOAD function which will catch
+wayward function calls from a subclass, and redirect it to the appropriate
+function in the calling class, if it exists.
+The AUTOLOAD feature is needed because, for example, when an object is
+instantiated in a WebFetch::Input::* class, it will later be passed to
+a WebFetch::Output::* class, whose data method functions can be accessed
+this way as if the WebFetch object had become a member of that class.
+
=cut
# allocate a new object
@@ -564,7 +567,8 @@
=item $obj->init( ... )
-This is called from the C<new> function of all WebFetch modules.
+This is called from the C<new> function that modules inherit from WebFetch.
+If subclasses override it, they should still call it before completion.
It takes "name" => "value" pairs which are all placed verbatim as
attributes in C<$obj>.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:55:12
|
Revision: 52
http://webfetch.svn.sourceforge.net/webfetch/?rev=52&view=rev
Author: ikluft
Date: 2009-09-07 01:55:06 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
remove dead code, modernize, docs
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/SiteNews.pm
Modified: branches/v0.13/lib/WebFetch/Input/SiteNews.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-07 01:54:18 UTC (rev 51)
+++ branches/v0.13/lib/WebFetch/Input/SiteNews.pm 2009-09-07 01:55:06 UTC (rev 52)
@@ -13,6 +13,12 @@
use Carp;
use Date::Calc qw(Today Delta_Days Month_to_Text);
+=head1 NAME
+
+WebFetch::Input::SiteNews - download and save SiteNews headlines
+
+=cut
+
# set defaults
our ( $cat_priorities, $now, $nowstamp );
@@ -30,6 +36,35 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "cmdline", "input:sitenews" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Input::SiteNews;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory
+ --source news-file --short short-form-output-file
+ --long long-form-output-file>
+
+=head1 DESCRIPTION
+
+This module gets the current headlines from a site-local file.
+
+The I<--source> parameter specifies a file name which contains news to be
+posted. See L<"FILE FORMAT"> below for details on contents to put in the
+file. I<--source> may be specified more than once, allowing a single news
+output to come from more than one input. For example, one file could be
+manually maintained in CVS or RCS and another could be entered from a
+web form.
+
+After this runs, the file C<site_news.html> will be created or replaced.
+If there already was a C<site_news.html> file, it will be moved to
+C<Osite_news.html>.
+
+=cut
+
# constants for state names
sub initial_state { 0; }
sub attr_state { 1; }
@@ -236,75 +271,6 @@
}
}
-# format handler function specific to this module's long-news output format
-sub fmt_handler_sitenews_long
-{
- my ( $self, $filename ) = @_;
-
- # sort events for long display
- my @long_news = sort {
- # sort news entries for long display
- # sorting priority:
- # date first
- # category/priority second
- # reverse file order last
-
- # sort by date
- my $lbl_fnum = $self->fname2fnum("label");
- my ( $a_date, $b_date) = ( $a->[$lbl_fnum], $b->[$lbl_fnum]);
- $a_date =~ s/-.*//;
- $b_date =~ s/-.*//;
- if ( $a_date ne $b_date ) {
- return $b_date cmp $a_date;
- }
-
- # sort by priority (within same date)
- my $pri_fnum = $self->fname2fnum("priority");
- if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) {
- return $a->[$pri_fnum] <=> $b->[$pri_fnum];
- }
-
- # sort by chronological order (within same date and priority)
- return $a->[$lbl_fnum] cmp $b->[$lbl_fnum];
- } @{$self->{data}{records}};
-
- # process the links for the long list
- my ( @long_text, $prev, $url_prefix, $i );
- $url_prefix = ( defined $self->{url_prefix})
- ? $self->{url_prefix}
- : "";
- $prev=undef;
- push @long_text, "<dl>";
- my $lbl_fnum = $self->fname2fnum("label");
- my $date_fnum = $self->fname2fnum("date");
- my $title_fnum = $self->fname2fnum("title");
- my $txt_fnum = $self->fname2fnum("text");
- my $exp_fnum = $self->fname2fnum("expired");
- my $pri_fnum = $self->fname2fnum("priority");
- for ( $i = 0; $i <= $#long_news; $i++ ) {
- my $news = $long_news[$i];
- if (( ! defined $prev->[$date_fnum]) or
- $prev->[$date_fnum] ne $news->[$date_fnum])
- {
- push @long_text, "<dt>".$news->[$date_fnum];
- push @long_text, "<dd>";
- }
- push @long_text, "<a name=\"".$news->[$lbl_fnum]."\">"
- .$news->[$txt_fnum]."</a>\n"
- ."<!--- priority: ".$news->[$pri_fnum]
- .($news->[$exp_fnum] ? " expired" : "")
- ." --->";
- push @long_text, "<p>";
- $prev = $news;
- }
- push @long_text, "</dl>";
-
- # store it for later save to disk
- $self->html_savable( $self->{long_path}, join("\n",@long_text)."\n" );
-}
-
-#---------------------------------------------------------------------------
-
#
# utility functions
#
@@ -351,43 +317,10 @@
}
}
-#---------------------------------------------------------------------------
-
1;
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Input::SiteNews - download and save SiteNews headlines
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Input::SiteNews;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory
- --source news-file --short short-form-output-file
- --long long-form-output-file>
-
-=head1 DESCRIPTION
-
-This module gets the current headlines from a site-local file.
-
-The I<--source> parameter specifies a file name which contains news to be
-posted. See L<"FILE FORMAT"> below for details on contents to put in the
-file. I<--source> may be specified more than once, allowing a single news
-output to come from more than one input. For example, one file could be
-manually maintained in CVS or RCS and another could be entered from a
-web form.
-
-After this runs, the file C<site_news.html> will be created or replaced.
-If there already was a C<site_news.html> file, it will be moved to
-C<Osite_news.html>.
-
=head1 FILE FORMAT
The WebFetch::Input::SiteNews data format is used to set up news for the
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:54:29
|
Revision: 51
http://webfetch.svn.sourceforge.net/webfetch/?rev=51&view=rev
Author: ikluft
Date: 2009-09-07 01:54:18 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
remove dead code, modernize, docs
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Output/TT.pm
Modified: branches/v0.13/lib/WebFetch/Output/TT.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-07 01:48:50 UTC (rev 50)
+++ branches/v0.13/lib/WebFetch/Output/TT.pm 2009-09-07 01:54:18 UTC (rev 51)
@@ -23,7 +23,12 @@
);
+=head1 NAME
+WebFetch::Output::TT - save data via the Perl Template Toolkit
+
+=cut
+
# set defaults
our @Options = ( "template=s", "tt_include:s" );
@@ -34,6 +39,29 @@
# register capabilities with WebFetch
__PACKAGE__->module_register( "cmdline", "output:tt" );
+=head1 SYNOPSIS
+
+In perl scripts:
+
+C<use WebFetch::Output::TT;>
+
+From the command line:
+
+C<perl -w -MWebFetch::Output::TT -e "&fetch_main" --
+ [...WebFetch input options...] --dir directory
+ --dest_format tt --dest dest-path --template tt-file >
+
+=head1 DESCRIPTION
+
+This module saves output via the Perl Template Toolkit.
+
+=item $obj->fmt_handler_tt( $filename )
+
+This function formats the data according to the Perl Template Toolkit
+template provided in the --template parameter.
+
+=cut
+
# Perl Template Toolkit format handler
sub fmt_handler_tt
{
@@ -52,11 +80,9 @@
my $template = Template->new( \%tt_config );
# process template
- my $result = $template->process( $self->{template}, $self->{data},
- \$output );
+ $template->process( $self->{template}, $self->{data}, \$output )
+ or throw_template $template->error();
- $result or throw_template ( $template->error());
-
$self->raw_savable( $filename, $output );
1;
}
@@ -65,27 +91,6 @@
__END__
# POD docs follow
-=head1 NAME
-
-WebFetch::Output::TT - save data via the Perl Template Toolkit
-
-=head1 SYNOPSIS
-
-In perl scripts:
-
-C<use WebFetch::Output::TT;>
-
-From the command line:
-
-C<perl -w -MWebFetch::Output::TT -e "&fetch_main" -- --dir directory
- --dest_format tt --dest dest-path [...WebFetch output options...]>
-
-=head1 DESCRIPTION
-
-This module saves output via the Perl Template Toolkit.
-
-TODO: add description
-
=head1 AUTHOR
WebFetch was written by Ian Kluft
@@ -95,12 +100,13 @@
=head1 SEE ALSO
=for html
-<a href="WebFetch.html">WebFetch</a>
+<a href="WebFetch.html">WebFetch</a>,
+<a href="http://www.template-toolkit.org/>Perl Template Toolkit</a>
=for text
-WebFetch
+WebFetch, Perl Template Toolkit
=for man
-WebFetch
+WebFetch, Perl Template Toolkit
=cut
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:49:01
|
Revision: 50
http://webfetch.svn.sourceforge.net/webfetch/?rev=50&view=rev
Author: ikluft
Date: 2009-09-07 01:48:50 +0000 (Mon, 07 Sep 2009)
Log Message:
-----------
remove dead code, modernize, docs
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
Modified: branches/v0.13/lib/WebFetch/Input/PerlStruct.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-06 23:04:36 UTC (rev 49)
+++ branches/v0.13/lib/WebFetch/Input/PerlStruct.pm 2009-09-07 01:48:50 UTC (rev 50)
@@ -1,4 +1,3 @@
-#
# WebFetch::Input::PerlStruct.pm
# push a Perl structure with pre-parsed news into WebFetch
#
@@ -11,15 +10,26 @@
use strict;
use base "WebFetch";
-use Carp;
+# define exceptions/errors
+use Exception::Class (
+ "WebFetch::Input::PerlStruct::Exception::NoStruct" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_nostruct",
+ description => "no 'content' structure was provided",
+ },
-our $format;
-our @Options = ( "format:s" );
+ "WebFetch::Input::PerlStruct::Exception::BadStruct" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_badstruct",
+ description => "content of 'content' was not recognizable",
+ },
+
+);
+
+our @Options = ( );
our $Usage = "";
# configuration parameters
-our $num_links = 5;
-our $default_format = "<a href=\"%url%\">%title%</a>";
# no user-servicable parts beyond this point
@@ -30,40 +40,51 @@
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- $self->{num_links} = $WebFetch::Input::PerlStruct::num_links;
- if ( defined $format ) {
- $self->{"format"} = $format;
- } else {
- $self->{"format"} = $WebFetch::Input::PerlStruct::default_format;
- }
-
# get the content from the provided perl structure
if ( !defined $self->{content}) {
- croak "WebFetch::Input::PerlStruct: content struct does not exist\n";
+ throw_nostruct "content struct does not exist";
}
- if ( ref($self->{content}) != "ARRAY" ) {
- croak "WebFetch::Input::PerlStruct: content is not an ARRAY ref\n";
- }
-
- # collate $self->{content} into @content_links by fields from format
- my ( @content_links, $part );
- my @fields = ( $self->{"format"} =~ /%([^%]*)%/go );
- foreach $part ( @{$self->{content}} ) {
- my ( $fname, $subparts );
- $subparts= [];
- foreach $fname ( @fields ) {
- push @$subparts, "".((defined $part->{$fname})
- ? $part->{$fname} : "" );
+ if ( ref($self->{content}) eq "HASH" ) {
+ if (( exists $self->{content}{fields})
+ and ( exists $self->{content}{records})
+ and ( exists $self->{content}{wk_names}))
+ {
+ # it's already in WebFetch Embedding data format
+ $self->{data} = $self->{content};
+ return;
}
- push ( @content_links, $subparts );
}
+ if ( ref($self->{content}) ne "ARRAY" ) {
+ throw_badstruct "content is not an ARRAY ref "
+ ."or WebFetch Embedding API data";
+ }
# build data structure
$self->{data} = {};
+ $self->{data}{fields} = [ sort keys %{$self->{content}[0]}];
+ $self->{data}{records} = [];
+ my ( $entry, $field );
+ foreach $entry ( @{$self->{content}}) {
+ my @new_entry;
+ foreach $field ( @{$self->{data}{fields}}) {
+ push @new_entry, ( exists $entry->{$field})
+ ? $entry->{$field} : "";
+ }
+ push @{$self->{data}{records}}, \@new_entry;
+ }
+
+ # map the well-known field names
+ if ( defined $self->{wk_names}) {
+ $self->{data}{wk_names} = $self->{wk_names};
+ } else {
+ # if not provided, fake it the best we can
+ $self->{data}{wk_names} = {};
+ foreach $field ( @{$self->{data}{fields}}) {
+ $self->{data}{wk_names}{$field} = $field;
+ }
+ }
}
-
1;
__END__
# POD docs follow
@@ -81,11 +102,8 @@
C<$obj = new WebFetch::Input::PerlStruct (
"content" => content_struct,
"dir" => output_dir,
- "file" => output_file,
- [ "format" => format_string, ]
- [ "export" => wf_export_filename, ]
- [ "font_size" => font_size, ]
- [ "font_face" => font_face, ]
+ "dest" => output_file,
+ "dest_format" => output_format, # used to select WebFetch output module
[ "group" => file_group_id, ]
[ "mode" => file_mode_perms, ]
[ "quiet" => 1 ]);>
@@ -116,55 +134,26 @@
=head1 THE CONTENT STRUCTURE
-The $content_struct parameter must be a reference to an array of hashes.
+The $content_struct parameter may be in either of two formats.
+
+If $content_struct is a hash reference containing entries called
+"fields", "wk_names" and "records", then it is assumed to be already
+in the format of the "data" element of the WebFetch Embedding API.
+
+Otherwise, it must be a reference to an array of hashes.
Each of the hashes represents a separate news item,
in the order they should be displayed.
-The fields of each has entry must provide enough information to
-match field names in all the the output formats you're using.
-Output formats include the following:
-=over 4
+The field names should be consistent through all records.
+WebFetch uses the field names from the first record and assumes the
+remainder are identical.
-=item HTML output file
-
-All the fields used in the $format_string (see below) must be present
-for generation of the HTML output.
-
-=item WebFetch export
-
-The $format_string also determines the fields that will be used
-for WebFetch export.
-Note that the WebFetch::General module expects by default to find
-fields called "url" and "title".
-So if you use something different from the default,
-you must provide your format string in the instructions
-for sites that fetch news from you.
-(Otherwise their WebFetch::General won't be looking for the fields
-you're providing.)
-
-=item MyNetscape export
-
-The MyNetscape export function expects to find fields called
-"title" and "url", and will skip any hash entry which is
-missing either of them.
-
-=back
-
-=head1 FORMAT STRINGS
-
-WebFetch::Input::PerlStruct uses a format string identical to WebFetch::General.
-The default format for retrieved data is
-
-<a href="%url%">%title%</a>
-
-See the WebFetch::General documentation for more details.
-
The names of the fields are chosen by the calling function.
-Though for the convenience of the user,
-the author of an exporting module should keep in mind the
-default WebFetch::Input::PerlStruct format uses fields called "url" and "title".
-If you use fields by different names, make sure your code provides those
-fields in the $content_struct parameter.
+If an array called "wk_names" is provided then it used to map
+well-known field names of the WebFetch Embedding API to field names in
+this data.
+Otherwise, meaning can only be applied to field names if they already
+match WebFetch's well-known field names.
=head1 AUTHOR
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-07 01:13:29
|
Revision: 49
http://webfetch.svn.sourceforge.net/webfetch/?rev=49&view=rev
Author: ikluft
Date: 2009-09-06 23:04:36 +0000 (Sun, 06 Sep 2009)
Log Message:
-----------
remove dead code
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/RSS.pm
Modified: branches/v0.13/lib/WebFetch/Input/RSS.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-05 09:55:18 UTC (rev 48)
+++ branches/v0.13/lib/WebFetch/Input/RSS.pm 2009-09-06 23:04:36 UTC (rev 49)
@@ -23,7 +23,6 @@
our $Usage = "";
# configuration parameters
-our $num_links = 5;
# no user-servicable parts beyond this point
@@ -35,15 +34,6 @@
{
my ( $self ) = @_;
- # set parameters for WebFetch routines
- if ( !defined $self->{num_links}) {
- $self->{num_links} = $WebFetch::Input::RSS::num_links;
- }
- if ( !defined $self->{style}) {
- $self->{style} = {};
- $self->{style}{para} = 1;
- }
-
# set up Webfetch Embedding API data
$self->{data} = {};
$self->{data}{fields} = [ "pubDate", "title", "link", "category",
@@ -58,8 +48,6 @@
};
$self->{data}{records} = [];
- # process the links
-
# parse data file
$self->parse_input();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-05 09:55:25
|
Revision: 48
http://webfetch.svn.sourceforge.net/webfetch/?rev=48&view=rev
Author: ikluft
Date: 2009-09-05 09:55:18 +0000 (Sat, 05 Sep 2009)
Log Message:
-----------
no longer needs its own version number
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Output/TWiki.pm
Modified: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-05 09:36:27 UTC (rev 47)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-05 09:55:18 UTC (rev 48)
@@ -56,10 +56,6 @@
WebFetch::Output::TWiki - The great new WebFetch::Output::TWiki!
-=head1 VERSION
-
-Version 0.01
-
=cut
# globals/defaults
@@ -71,8 +67,6 @@
# no user-servicable parts beyond this point
-our $VERSION = '0.01';
-
# register capabilities with WebFetch
__PACKAGE__->module_register( "cmdline", "output:twiki" );
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-05 09:36:35
|
Revision: 47
http://webfetch.svn.sourceforge.net/webfetch/?rev=47&view=rev
Author: ikluft
Date: 2009-09-05 09:36:27 +0000 (Sat, 05 Sep 2009)
Log Message:
-----------
add id field
Modified Paths:
--------------
branches/v0.13/lib/WebFetch/Input/Atom.pm
Modified: branches/v0.13/lib/WebFetch/Input/Atom.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-05 09:35:33 UTC (rev 46)
+++ branches/v0.13/lib/WebFetch/Input/Atom.pm 2009-09-05 09:36:27 UTC (rev 47)
@@ -50,6 +50,7 @@
"summary", "content", "xml" ];
# defined which fields match to which "well-known field names"
$self->{data}{wk_names} = {
+ "id" => "id",
"title" => "title",
"url" => "link",
"date" => "updated",
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <ik...@us...> - 2009-09-05 09:35:44
|
Revision: 46
http://webfetch.svn.sourceforge.net/webfetch/?rev=46&view=rev
Author: ikluft
Date: 2009-09-05 09:35:33 +0000 (Sat, 05 Sep 2009)
Log Message:
-----------
new module WebFetch::Output::TWiki
Added Paths:
-----------
branches/v0.13/lib/WebFetch/Output/TWiki.pm
Added: branches/v0.13/lib/WebFetch/Output/TWiki.pm
===================================================================
--- branches/v0.13/lib/WebFetch/Output/TWiki.pm (rev 0)
+++ branches/v0.13/lib/WebFetch/Output/TWiki.pm 2009-09-05 09:35:33 UTC (rev 46)
@@ -0,0 +1,414 @@
+#
+# WebFetch::Output::TWiki - save data into a TWiki web site
+#
+# Copyright (c) 2009 Ian Kluft. This program is free software; you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License Version 3. See http://www.webfetch.org/GPLv3.txt
+
+package WebFetch::Output::TWiki;
+
+use warnings;
+use strict;
+use WebFetch;
+use base "WebFetch";
+use DB_File;
+
+# define exceptions/errors
+use Exception::Class (
+ "WebFetch::Output::TWiki::Exception::NoRoot" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_no_root",
+ description => "WebFetch::Output::TWiki needs to be provided "
+ ."a twiki_root parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::NotFound" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_not_found",
+ description => "the directory in the twiki_root parameter "
+ ."doesn't exist or doesn't have a lib subdirectory",
+ },
+ "WebFetch::Output::TWiki::Exception::Require" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_require",
+ description => "failed to import TWiki or TWiki::Func modules",
+ },
+ "WebFetch::Output::TWiki::Exception::NoConfig" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_no_config",
+ description => "WebFetch::Output::TWiki needs to be provided "
+ ."a config_topic parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::ConfigMissing" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_config_missing",
+ description => "WebFetch::Output::TWiki is missing a required "
+ ."configuration parameter",
+ },
+ "WebFetch::Output::TWiki::Exception::Oops" => {
+ isa => "WebFetch::Exception",
+ alias => "throw_twiki_oops",
+ description => "WebFetch::Output::TWiki returned errors from "
+ ."saving one or more entries",
+ },
+);
+
+=head1 NAME
+
+WebFetch::Output::TWiki - The great new WebFetch::Output::TWiki!
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+# globals/defaults
+our @Options = ( "twiki_root=s", "config_topic=s", "config_key=s" );
+our $Usage = "--twiki_root path-to-twiki --config_topic web.topic "
+ ."--config_key keyword";
+our @default_field_names = ( qw( key web parent prefix template form
+ options ));
+
+# no user-servicable parts beyond this point
+
+our $VERSION = '0.01';
+
+# register capabilities with WebFetch
+__PACKAGE__->module_register( "cmdline", "output:twiki" );
+
+=head1 SYNOPSIS
+
+Quick summary of what the module does.
+
+Perhaps a little code snippet.
+
+ use WebFetch::Output::TWiki;
+
+ my $foo = WebFetch::Output::TWiki->new();
+ ...
+
+=head1 EXPORT
+
+A list of functions that can be exported. You can delete this section
+if you don't export anything, such as for a purely object-oriented module.
+
+=head1 FUNCTIONS
+
+=head2 get_twiki_config
+
+=cut
+
+# read the TWiki configuation
+sub get_twiki_config
+{
+ my $self = shift;
+ WebFetch::debug "in get_twiki_config";
+
+ # find the TWiki modules
+ if ( ! exists $self->{twiki_root}) {
+ throw_twiki_no_root( "TWiki root directory not defined" );
+ }
+ if (( ! -d $self->{twiki_root}) or ( ! -d $self->{twiki_root}."/lib" ))
+ {
+ throw_twiki_not_found( "can't find TWiki root or lib at "
+ .$self->{twiki_root});
+ }
+
+ # load the TWiki modules
+ WebFetch::debug "loading TWiki modules";
+ push @INC, $self->{twiki_root}."/lib";
+ eval { require TWiki; require TWiki::Func; };
+ if ( $@ ) {
+ throw_twiki_require ( $@ );
+ }
+
+ # initiate TWiki library, create session as user "WebFetch"
+ $self->{twiki_obj} = TWiki->new( "WebFetch" );
+
+ # get the contents of the TWiki topic which contains our configuration
+ if ( !exists $self->{config_topic}) {
+ throw_twiki_no_config( "TWiki configuration page for WebFetch "
+ ."not defined" );
+ }
+ my ( $web, $topic ) = split /\./, $self->{config_topic};
+ WebFetch::debug "config_topic: ".$self->{config_topic}
+ ." -> $web, $topic";
+ if (( ! defined $web ) or ( ! defined $topic )) {
+ throw_twiki_no_config( "TWiki configuration page for WebFetch "
+ ."must be defined in the format web.topic" );
+ }
+
+ # check if a config_key was specified before we read the configuration
+ if ( !exists $self->{config_key}) {
+ throw_twiki_no_config( "TWiki configuration key for WebFetch "
+ ."not defined" );
+ }
+
+ # read the configuration info
+ my $config = TWiki::Func::readTopic( $web, $topic );
+
+ # if STARTINCLUDE and STOPINCLUDE are present, use only what's between
+ if ( $config =~ /%STARTINCLUDE%\s*(.*)\s*%STOPINCLUDE%/s ) {
+ $config = $1;
+ }
+
+ # parse the configuration
+ WebFetch::debug "parsing configuration";
+ my ( @fnames, $line );
+ $self->{twiki_config_all} = [];
+ $self->{twiki_keys} = {};
+ foreach $line ( split /\r*\n+/s, $config ) {
+ if ( $line =~ /^\|\s*(.*)\s*\|\s*$/ ) {
+ my @entries = split /\s*\|\s*/, $1;
+
+ # check first line for field headings
+ if ( ! @{$self->{twiki_config_all}}) {
+ if ( $entries[0] =~ /\*\w+\*/ ) {
+ # save table headings as field names
+ my $field;
+ foreach $field ( @entries ) {
+ if ( $field =~ /\*(\w*)\*/ )
+ {
+ push @fnames, lc($1);
+ } else {
+ my $tmp = lc($field);
+ $tmp =~ s/\W//g;
+ push @fnames, $tmp;
+ }
+ }
+ next;
+ } else {
+ # use default field names
+ @fnames = @default_field_names;
+ }
+ }
+ WebFetch::debug "field names: ".join " ", @fnames;
+
+ # save the entries
+ # it isn't a heading row if we got here
+ # transfer array @entries to named fields in %config
+ WebFetch::debug "data row: ".join " ", @entries;
+ my ( $i, $key, %config );
+ for ( $i=0; $i < scalar @fnames; $i++ ) {
+ $config{ $fnames[$i]} = $entries[$i];
+ if ( $fnames[$i] eq "key" ) {
+ $key = $entries[$i];
+ }
+ }
+
+ # save the %config row in @{$self->{twiki_config_all}}
+ if (( defined $key )
+ and ( !exists $self->{twiki_keys}{$key}))
+ {
+ push @{$self->{twiki_config_all}}, \%config;
+ $self->{twiki_keys}{$key} = ( scalar
+ @{$self->{twiki_config_all}}) - 1;
+ }
+ }
+ }
+
+ # select the line which is for this request
+ if ( ! exists $self->{twiki_keys}{$self->{config_key}}) {
+ throw_twiki_no_config "no configuration found for key "
+ .$self->{config_key};
+ }
+ $self->{twiki_config} = $self->{twiki_config_all}[$self->{twiki_keys}{$self->{config_key}}];
+ WebFetch::debug "twiki_config: ".join( " ", %{$self->{twiki_config}});
+}
+
+=head2 write_to_twiki
+
+=cut
+
+sub write_to_twiki
+{
+ my $self = shift;
+ my ( $config, $name );
+
+ # get config variables
+ $config = $self->{twiki_config};
+ foreach $name ( qw( key web parent prefix template form )) {
+ if ( !exists $self->{twiki_config}{$name}) {
+ throw_twiki_config_missing( "missing config parameter "
+ .$name );
+ }
+ }
+
+ # get text of template topic
+ my $template = TWiki::Func::readTopic( $config->{web},
+ $config->{template});
+
+ # determine unique identifier field
+ my $id_field = $self->wk2fnum( "id" );
+ if ( ! defined $id_field ) {
+ $id_field = $self->wk2fnum( "url" );
+ } elsif ( ! defined $id_field ) {
+ $id_field = $self->wk2fnum( "title" );
+ }
+ WebFetch::debug "write_to_twiki: id_field=$id_field";
+
+ # open DB file for tracking unique IDs of articles already processed
+ my %id_index;
+ tie %id_index, 'DB_File',
+ $self->{dir}."/".$config->{key}."_id_index.db",
+ &DB_File::O_CREAT|&DB_File::O_RDWR, 0640;
+
+ # determine initial topic name
+ my ( %topics, @topics );
+ @topics = TWiki::Func::getTopicList( $config->{web});
+ foreach ( @topics ) {
+ $topics{$_} = 1;
+ }
+ my $tnum_counter = 0;
+ my $tnum_format = $config->{prefix}."-%07d";
+
+ # create topics with metadata from each WebFetch data record
+ my $entry;
+ my @oopses;
+ foreach $entry ( @{$self->{data}{records}}) {
+
+ # check that this entry hasn't already been forwarded to TWiki
+ if ( exists $id_index{$entry->[$id_field]}) {
+ next;
+ }
+ $id_index{$entry->[$id_field]} = time;
+
+ # select topic name
+ my $topicname = sprintf $tnum_format, $tnum_counter;
+ while ( exists $topics{$topicname}) {
+ $tnum_counter++;
+ $topicname = sprintf $tnum_format, $tnum_counter;
+ }
+ $tnum_counter++;
+ $topics{$topicname} = 1;
+ my $text = $template;
+ WebFetch::debug "write_to_twiki: writing $topicname";
+
+ # create topic metadata
+ my $meta = TWiki::Meta->new ( $self->{twiki_obj},
+ $config->{web}, $topicname );
+ $meta->put( "TOPICPARENT",
+ { name => $config->{parent}});
+ $meta->put( "FORM", { name => $config->{form}});
+ my $fnum;
+ for ( $fnum = 0; $fnum <= @{$self->{data}{fields}}; $fnum++ ) {
+ WebFetch::debug "meta: ".$self->{data}{fields}[$fnum]
+ ." = ".$entry->[$fnum];
+ ( defined $self->{data}{fields}[$fnum]) or next;
+ ( $self->{data}{fields}[$fnum] eq "xml") and next;
+ ( defined $entry->[$fnum]) or next;
+ WebFetch::debug "meta: OK";
+ $meta->putKeyed( "FIELD", {
+ name => $self->{data}{fields}[$fnum],
+ value => $entry->[$fnum] });
+ }
+
+ # save a special title field for TWiki indexes
+ my $index_title = $entry->[$self->wk2fnum( "title" )];
+ $index_title =~ s/[\t\r\n\|]+/ /gs;
+ $index_title =~ s/^\s*//;
+ $index_title =~ s/\s*$//;
+ if ( length($index_title) > 60 ) {
+ substr( $index_title, 56 ) = "...";
+ }
+ WebFetch::debug "title: $index_title";
+ $meta->putKeyed( "FIELD", {
+ name => "IndexTitle",
+ title => "Indexing title",
+ value => $index_title });
+
+ # save the topic
+ my $oopsurl = TWiki::Func::saveTopic( $config->{web},
+ $topicname, $meta, $text );
+ if ( $oopsurl ) {
+ WebFetch::debug "write_to_twiki: $topicname - $oopsurl";
+ push @oopses, $entry->[$self->wk2fnum( "title" ) or 0]
+ ." -> ".$topicname." ".$oopsurl;
+ }
+ }
+
+ # check for errors
+ if ( @oopses ) {
+ throw_twiki_oops( "These saves failed:\n".join "\n", @oopses );
+ }
+}
+
+=head2 fmt_handler_twiki
+
+=cut
+
+# TWiki format handler
+sub fmt_handler_twiki
+{
+ my $self = shift;
+ my $filename = shift;
+
+ # get configuration from TWiki
+ $self->get_twiki_config;
+
+ # write to TWiki topic
+ $self->write_to_twiki;
+
+ # no savables for WebFetch::save - mark it OK
+ $self->no_savables_ok;
+ 1;
+}
+
+
+=head1 AUTHOR
+
+Ian Kluft, C<< <ikluft at cpan.org> >>
+
+=head1 BUGS
+
+Please report any bugs or feature requests to C<bug-webfetch-output-twiki at rt.cpan.org>, or through
+the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WebFetch-Output-TWiki>. I will be notified, and then you'll
+automatically be notified of progress on your bug as I make changes.
+
+
+
+
+=head1 SUPPORT
+
+You can find documentation for this module with the perldoc command.
+
+ perldoc WebFetch::Output::TWiki
+
+
+You can also look for information at:
+
+=over 4
+
+=item * RT: CPAN's request tracker
+
+L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WebFetch-Output-TWiki>
+
+=item * AnnoCPAN: Annotated CPAN documentation
+
+L<http://annocpan.org/dist/WebFetch-Output-TWiki>
+
+=item * CPAN Ratings
+
+L<http://cpanratings.perl.org/d/WebFetch-Output-TWiki>
+
+=item * Search CPAN
+
+L<http://search.cpan.org/dist/WebFetch-Output-TWiki/>
+
+=back
+
+
+=head1 ACKNOWLEDGEMENTS
+
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2009 Ian Kluft, all rights reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+
+=cut
+
+1; # End of WebFetch::Output::TWiki
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|