From: <ik...@us...> - 2009-08-25 22:17:42
|
Revision: 33 http://webfetch.svn.sourceforge.net/webfetch/?rev=33&view=rev Author: ikluft Date: 2009-08-25 22:17:25 +0000 (Tue, 25 Aug 2009) Log Message: ----------- code modernization version bumped to 0.12 WebFetch.pm: eval wrapper for main changes to plugin API for automated selection of plugin modules changed command line processing replaced "fetch" capability string with "input" group Modified Paths: -------------- branches/v0.11/lib/WebFetch/Input/PerlStruct.pm branches/v0.11/lib/WebFetch/Input/RSS.pm branches/v0.11/lib/WebFetch/Input/SiteNews.pm branches/v0.11/lib/WebFetch/Output/Dump.pm branches/v0.11/lib/WebFetch.pm Modified: branches/v0.11/lib/WebFetch/Input/PerlStruct.pm =================================================================== --- branches/v0.11/lib/WebFetch/Input/PerlStruct.pm 2009-08-22 05:44:49 UTC (rev 32) +++ branches/v0.11/lib/WebFetch/Input/PerlStruct.pm 2009-08-25 22:17:25 UTC (rev 33) @@ -14,7 +14,7 @@ use Carp; our $format; -our @Options = ( "format:s" => \$format ); +our @Options = ( "format:s" ); our $Usage = ""; # configuration parameters @@ -23,8 +23,8 @@ # no user-servicable parts beyond this point -# register with WebFetch to provide "fetch" capability -__PACKAGE__->module_register( "fetch", "input:perlstruct" ); +# register capabilities with WebFetch +__PACKAGE__->module_register( "input:perlstruct" ); sub fetch { Modified: branches/v0.11/lib/WebFetch/Input/RSS.pm =================================================================== --- branches/v0.11/lib/WebFetch/Input/RSS.pm 2009-08-22 05:44:49 UTC (rev 32) +++ branches/v0.11/lib/WebFetch/Input/RSS.pm 2009-08-25 22:17:25 UTC (rev 33) @@ -27,8 +27,8 @@ # no user-servicable parts beyond this point -# register with WebFetch to provide "fetch" capability -__PACKAGE__->module_register( "fetch" ); +# register capabilities with WebFetch +__PACKAGE__->module_register( "input:rss" ); # called from WebFetch main routine sub fetch Modified: branches/v0.11/lib/WebFetch/Input/SiteNews.pm =================================================================== --- branches/v0.11/lib/WebFetch/Input/SiteNews.pm 2009-08-22 05:44:49 UTC (rev 32) +++ branches/v0.11/lib/WebFetch/Input/SiteNews.pm 2009-08-25 22:17:25 UTC (rev 33) @@ -14,23 +14,21 @@ use Date::Calc qw(Today Delta_Days Month_to_Text); # set defaults -our ( @input, $short_path, $long_path, $cat_priorities, $now, $nowstamp ); -our $short_path = undef; -our $long_path = undef; +our ( $cat_priorities, $now, $nowstamp ); our @Options = ( - "input=s@" => \@input, - "short=s" => \$short_path, - "long=s" => \$long_path); -our $Usage = "--input news-file --short short-output-file --long long-output-file"; + "short=s", + "long=s", +); +our $Usage = "--short short-output-file --long long-output-file"; # configuration parameters our $num_links = 5; # no user-servicable parts beyond this point -# register with WebFetch to provide "fetch" capability -__PACKAGE__->module_register( "fetch", "input:sitenews" ); +# register capabilities with WebFetch +__PACKAGE__->module_register( "cmdline", "input:sitenews" ); # constants for state names sub initial_state { 0; } @@ -72,13 +70,15 @@ $nowstamp = sprintf "%04d%02d%02d", @$now; # parse data file - my $input; - foreach $input ( @input ) { - $self->parse_input( $input ); + my $source; + if (( exists $self->{sources}) and ( ref $self->{sources} eq "ARRAY" )) { + foreach $source ( @{$self->{sources}}) { + $self->parse_input( $source ); + } } # set parameters for the short news format - if ( defined $short_path ) { + if ( defined $self->{short_path} ) { # create the HTML actions list $self->{actions}{html} = []; @@ -123,18 +123,18 @@ }; # put parameters for fmt_handler_html() on the html list - push @{$self->{actions}{html}}, [ $short_path, $params ]; + push @{$self->{actions}{html}}, [ $self->{short_path}, $params ]; } # set parameters for the long news format - if ( defined $long_path ) { + if ( defined $self->{long_path} ) { # create the SiteNews-specific action list # It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long() # which is defined in this file $self->{actions}{sitenews_long} = []; # put parameters for fmt_handler_sitenews_long() on the list - push @{$self->{actions}{sitenews_long}}, [ $long_path ]; + push @{$self->{actions}{sitenews_long}}, [ $self->{long_path} ]; } } @@ -300,7 +300,7 @@ push @long_text, "</dl>"; # store it for later save to disk - $self->html_savable( $long_path, join("\n",@long_text)."\n" ); + $self->html_savable( $self->{long_path}, join("\n",@long_text)."\n" ); } #--------------------------------------------------------------------------- @@ -342,8 +342,9 @@ if (( defined $entry->{category}) and ( defined $cat_priorities->{$entry->{category}})) { - return $cat_priorities->{$entry->{category}} + $age * 0.025 - + $bonus; + my $cat_pri = ( exists $cat_priorities->{$entry->{category}}) + ? $cat_priorities->{$entry->{category}} : 0; + return $cat_pri + $age * 0.025 + $bonus; } else { return $cat_priorities->{"default"} + $age * 0.025 + $bonus; @@ -369,16 +370,16 @@ From the command line: C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory - --input news-file --short short-form-output-file + --source news-file --short short-form-output-file --long long-form-output-file> =head1 DESCRIPTION This module gets the current headlines from a site-local file. -The I<--input> parameter specifies a file name which contains news to be +The I<--source> parameter specifies a file name which contains news to be posted. See L<"FILE FORMAT"> below for details on contents to put in the -file. I<--input> may be specified more than once, allowing a single news +file. I<--source> may be specified more than once, allowing a single news output to come from more than one input. For example, one file could be manually maintained in CVS or RCS and another could be entered from a web form. Modified: branches/v0.11/lib/WebFetch/Output/Dump.pm =================================================================== --- branches/v0.11/lib/WebFetch/Output/Dump.pm 2009-08-22 05:44:49 UTC (rev 32) +++ branches/v0.11/lib/WebFetch/Output/Dump.pm 2009-08-25 22:17:25 UTC (rev 33) @@ -32,8 +32,8 @@ # no user-servicable parts beyond this point -# register with WebFetch to provide "fetch" capability -__PACKAGE__->module_register( "save", "output:dump" ); +# register capabilities with WebFetch +__PACKAGE__->module_register( "output:dump" ); # Perl structure dump format handler sub fmt_handler_dump @@ -65,7 +65,7 @@ =head1 DESCRIPTION -This module gets the current headlines from a site-local file. +This module gets the current news headlines from a site-local file. TODO: add description Modified: branches/v0.11/lib/WebFetch.pm =================================================================== --- branches/v0.11/lib/WebFetch.pm 2009-08-22 05:44:49 UTC (rev 32) +++ branches/v0.11/lib/WebFetch.pm 2009-08-25 22:17:25 UTC (rev 33) @@ -118,28 +118,72 @@ # define exceptions/errors use Exception::Class ( 'WebFetch::Exception', + 'WebFetch::TracedException' => { + isa => 'WebFetch::Exception', + }, + 'WebFetch::Exception::GetoptError' => { + isa => 'WebFetch::Exception', + alias => 'throw_getopt_error', + description => "software error during command line processing", + }, + + 'WebFetch::Exception::Usage' => { + isa => 'WebFetch::Exception', + alias => 'throw_cli_usage', + description => "command line processing failed", + }, + 'WebFetch::Exception::Save' => { isa => 'WebFetch::Exception', + alias => 'throw_save_error', description => "an error occurred while saving the data", - trace => 0, }, + 'WebFetch::Exception::NoSave' => { + isa => 'WebFetch::Exception', + alias => 'throw_no_save', + description => "unable to save data because of no data or nowhere to save it", + }, + + 'WebFetch::Exception::NoInputHandler' => { + isa => 'WebFetch::Exception', + alias => 'throw_no_input_handler', + description => "no input handler was found", + }, + 'WebFetch::Exception::MustOverride' => { - isa => 'WebFetch::Exception', + isa => 'WebFetch::TracedException', description => "A WebFetch function was called which is " ."supposed to be overridden by a subclass", - trace => 1, }, + 'WebFetch::Exception::NetworkGet' => { isa => 'WebFetch::Exception', description => "Failed to access RSS feed", - trace => 0, }, + 'WebFetch::Exception::ModLoadFailure' => { + isa => 'WebFetch::Exception', + alias => 'throw_mod_load_failure', + description => "failed to load a WebFetch Perl module", + }, + + 'WebFetch::Exception::ModRunFailure' => { + isa => 'WebFetch::Exception', + alias => 'throw_mod_run_failure', + description => "failed to run a WebFetch module", + }, + + 'WebFetch::Exception::ModNoRunModule' => { + isa => 'WebFetch::Exception', + alias => 'throw_no_run', + description => "no module was found to run the request", + }, + ); -our $VERSION = '0.11-pre3'; +our $VERSION = '0.12'; our %default_modules = ( "input" => { "rss" => "WebFetch::Input::RSS", @@ -150,6 +194,7 @@ }, "output" => { "rss" => "WebFetch::Output:RSS", + "atom" => "WebFetch::Output:Atom", "tt" => "WebFetch::Output:TT", "perlstruct" => "WebFetch::Output::PerlStruct", "dump" => "WebFetch::Output::Dump", @@ -173,10 +218,11 @@ For the $module parameter, the Perl module should provide its own name, usually via the __PACKAGE__ string. -The @capabilities array is any number of strings as needed to list the capabilities which -the module performs for the WebFetch API. -The currently-recognized capabilities are "fetch" and "save". -However, the function will save all the capability names that the module provides. +The @capabilities array is any number of strings as needed to list the +capabilities which the module performs for the WebFetch API. +The currently-recognized capabilities are "cmdline", "input" and "output". +However, the function will save all the capability names that the module +provides. =cut @@ -210,13 +256,118 @@ } } +# satisfy POD coverage test - but don't put this function in the user manual +=pod +=cut +# module selection - choose WebFetch module based on selected file format +# for WebFetch internal use only +sub module_select +{ + my $capability = shift; + my $is_optional = shift; + + $debug and print STDERR "debug: " + ."module_select($capability,$is_optional)\n"; + # parse the capability string + my ( $group, $topic ); + if ( $capability =~ /([^:]*):(.*)/ ) { + $group = $1; + $topic = $2 + } else { + $topic = $capability; + } + + # check for modules to handle the specified source_format + my ( @handlers, %handlers, $handler ); + + # consider whether a group is in use (single or double-level scan) + if ( $group ) { + # double-level scan + + # if the group exists, search in it + if (( exists $modules{$group}{$topic} ) + and ( ref $modules{$group}{$topic} eq "ARRAY" )) + { + # search group for topic + foreach $handler (@{$modules{$group}{$topic}}) + { + if ( !exists $handlers{$handler}) { + push @handlers, $handler; + $handlers{$handler} = 1; + } + } + + # otherwise check the defaults + } elsif ( exists $default_modules{$group}{$topic} ) { + # check default handlers + $handler = $default_modules{$group}{$topic}; + if ( !exists $handlers{$handler}) { + push @handlers, $handler; + $handlers{$handler} = 1; + } + } + } else { + # single-level scan + + # if the topic exists, the search is a success + if (( exists $modules{$topic}) + and ( ref $modules{$topic} eq "ARRAY" )) + { + @handlers = @{$modules{$topic}}; + } + } + + # check if any handlers were found for this input format + if ( ! @handlers and ! $is_optional ) { + throw_no_input_handler( "handler not found for $capability" ); + } + + $debug and print STDERR "debug: module_select: " + .join( " ", @handlers )."\n"; + return @handlers; +} + +# satisfy POD coverage test - but don't put this function in the user manual +=pod +=cut + +# if no input or output format was specified, but only 1 is registered, pick it +# $group parameter should be config group to search, i.e. "input" or "output" +# returns the format string which will be provided +sub singular_handler +{ + my $group = shift; + + $debug and print STDERR "debug: singular_handler($group)\n"; + my $count = 0; + my ( $entry, $last ); + foreach $entry ( keys %{$modules{$group}} ) { + if ( ref $modules{$group}{$entry} eq "ARRAY" ) { + my $entry_count = scalar @{$modules{$group}{$entry}}; + $count += $entry_count; + if ( $count > 1 ) { + return undef; + } + if ( $entry_count == 1 ) { + $last = $entry; + } + } + } + + # if there's only one registered, that's the one to use + $debug and print STDERR "debug: singular_handler: " + ."count=$count last=$last\n"; + return $count == 1 ? $last : undef; +} + + =item fetch_main This function is exported into the main package. -For all modules which registered with the "fetch" capability at the time -this is called, it will call the run() function on behalf of each of the -packages. +For all modules which registered with an "input" capability for the requested +file format at the time this is called, it will call the run() function on +behalf of each of the packages. =cut @@ -224,20 +375,159 @@ # This eliminates the need for the sub-packages to export their own # fetch_main(), which users found conflicted with each other when # loading more than one WebFetch-derived module. +=head2 eval_wrapper ( $code, $throw_func, [ name => value, ...] ) + +=cut + +# fetch_main - eval wrapper for fetch_main2 to catch and display errors sub main::fetch_main { - my ( $pkgname ); + # run fetch_main2 in an eval so we can catch exceptions + my $result = eval { &WebFetch::fetch_main2; }; - # loop through the packages which registered with fetch capability - print STDERR "WebFetch: fetch_main\n"; - foreach $pkgname ( @{$modules{fetch}}) { - print STDERR "WebFetch: running for $pkgname\n"; - eval "\&WebFetch::run(\$pkgname)"; + # process any error/exception that we may have gotten + if ( $@ ) { + my $ex = $@; + + # determine if there's an error message available to display + my $pkg = __PACKAGE__; + if ( ref $ex ) { + if ( my $ex_cap = Exception::Class->caught( + "WebFetch::Exception")) + { + if ( $ex_cap->isa( "WebFetch::TracedException" )) { + warn $ex_cap->trace->as_string, "\n"; + } + + die "$pkg: ".$ex_cap->error."\n"; + } + if ( $ex->can("stringify")) { + # Error.pm, possibly others + die "$pkg: ".$ex->stringify."\n"; + } elsif ( $ex->can("as_string")) { + # generic - should work for many classes + die "$pkg: ".$ex->as_string."\n"; + } else { + die "$pkg: unknown exception of type " + .(ref $ex)."\n"; + } + } else { + die "pkg: $@\n"; + } + } + + # success + exit 0; +} + + +sub fetch_main2 +{ + # search for modules which have registered "cmdline" capability + # collect their command line options + my ( $cli_mod, @mod_options, @mod_usage ); + if (( exists $modules{cmdline} ) + and ( ref $modules{cmdline} eq "ARRAY" )) + { + foreach $cli_mod ( @{$modules{cmdline}}) { + if ( defined @cli_mod::Options ) { + push @mod_options, @cli_mod::Options; + } + if ( defined @cli_mod::Usage ) { + push @mod_options, @cli_mod::Usage; + } + } + } + + # process command line + my ( $result, %options ); + $result = eval { GetOptions ( \%options, + "dir:s", + "group:s", + "mode:s", + "source=s", + "source_format:s", + "dest=s", + "dest_format:s", + "quiet", + "debug", + @mod_options ) }; + if ( $@ ) { + throw_getopt_error ( "command line processing failed: $@" ); + } elsif ( ! $result ) { + throw_cli_usage ( "usage: $0 --dir dirpath " + ."[--group group] [--mode mode] " + ."[--source file] [--source_format fmt-string] " + ."[--dest file] [--dest_format fmt-string] " + ."[--quiet] ".join( " ", @mod_usage )); + } + + # set debugging mode + if (( exists $options{debug}) and $options{debug}) { + $debug = 1; + } + $debug and print STDERR "debug: fetch_main\n"; + + + # if either source/input or dest/output formats were not provided, + # check if only one handler is defined - if so that's the one to use + if ( !exists $options{source_format}) { + if ( my $fmt = singular_handler( "input" )) { + $options{source_format} = $fmt; + } + } + if ( !exists $options{dest_format}) { + if ( my $fmt = singular_handler( "output" )) { + $options{dest_format} = $fmt; + } + } + + # check for modules to handle the specified source_format + my ( @handlers, %handlers ); + if (( exists $modules{input}{ $options{source_format}} ) + and ( ref $modules{input}{ $options{source_format}} + eq "ARRAY" )) + { + my $handler; + foreach $handler (@{$modules{input}{$options{source_format}}}) + { + if ( !exists $handlers{$handler}) { + push @handlers, $handler; + $handlers{$handler} = 1; + } + } + } + if ( exists $default_modules{ $options{source_format}} ) { + my $handler = $default_modules{ $options{source_format}}; + if ( !exists $handlers{$handler}) { + push @handlers, $handler; + $handlers{$handler} = 1; + } + } + + # check if any handlers were found for this input format + if ( ! @handlers ) { + throw_no_input_handler( "input handler not found for " + .$options{source_format}); + } + + # run the available handlers until one succeeds or none are left + my $pkgname; + my $run_count = 0; + foreach $pkgname ( @handlers ) { + $debug and print STDERR "debug: running for $pkgname\n"; + eval { &WebFetch::run( $pkgname, \%options )}; if ( $@ ) { print STDERR "WebFetch: run eval error: $@\n"; + } else { + $run_count++; + last; } } - + if ( $run_count == 0 ) { + throw_no_run( "no handlers were able or available to process " + ." source format" ); + } } =item Do not use the new() function directly from WebFetch. @@ -286,6 +576,24 @@ } } +=item WebFetch::mod_load ( $class ) + +This specifies a WebFetch module (Perl class) which needs to be loaded. +In case of an error, it throws an exception. + +=cut + +sub mod_load +{ + my $pkg = shift; + + # make sure we have the run package loaded + eval "require $pkg"; + if ( $@ ) { + throw_mod_load_failure( "failed to load $pkg: $@" ); + } +} + =item WebFetch::run This function can be called by the C<main::fetch_main> function @@ -314,8 +622,8 @@ (optional) save a copy of the fetched info in the file named by this parameter. -The contents of the file are determined by the C<--save_format> parameter. -If C<--save_format> isn't defined but only one module has registered a +The contents of the file are determined by the C<--dest_format> parameter. +If C<--dest_format> isn't defined but only one module has registered a file format for saving, then that will be used by default. =item --quiet @@ -346,35 +654,42 @@ sub run { my $run_pkg = shift; - my ( $obj, $dir, $group, $mode, - $dest, $save_format, - $quiet, $source, $source_format ); + my $options_ref = shift; + my $obj; - my $result = GetOptions ( - "dir=s" => \$dir, - "group:s" => \$group, - "mode:s" => \$mode, - "dest:s" => \$dest, - "save_format:s" => \$save_format, - "source:s" => \$source, - "quiet" => \$quiet, - "debug" => \$debug, - ( eval "defined \@".$run_pkg."::Options" ) - ? eval "\@".$run_pkg."::Options" - : ()); - if ( ! $result ) { - print STDERR "usage: $0 --dir dirpath " - ."[--group group] [--mode mode] " - ."[--save file] [--save_format fmt-string] " - ."[--quiet]\n"; - if ( eval "defined \$".$run_pkg."::Usage" ) { - print STDERR " " - .( eval "\$".$run_pkg."::Usage" )."\n"; - } - exit 1; - } - $debug and print STDERR "WebFetch: entered run for $run_pkg\n"; + #my ( $obj, $dir, $group, $mode, + # $dest, $dest_format, + # $quiet, $source, $source_format ); + #my $result = GetOptions ( + # "dir=s" => \$dir, + # "group:s" => \$group, + # "mode:s" => \$mode, + # "dest:s" => \$dest, + # "dest_format:s" => \$dest_format, + # "source:s" => \$source, + # "source_format:s" => \$source_format, + # "quiet" => \$quiet, + # "debug" => \$debug, + # ( eval "defined \@".$run_pkg."::Options" ) + # ? eval "\@".$run_pkg."::Options" + # : ()); + #if ( ! $result ) { + # print STDERR "usage: $0 --dir dirpath " + # ."[--group group] [--mode mode] " + # ."[--source file] [--source_format fmt-string] " + # ."[--dest file] [--dest_format fmt-string] " + # ."[--quiet]\n"; + # if ( eval "defined \$".$run_pkg."::Usage" ) { + # print STDERR " " + # .( eval "\$".$run_pkg."::Usage" )."\n"; + # } + # exit 1; + #} + $debug and print STDERR "debug: entered run for $run_pkg\n"; + # make sure we have the run package loaded + mod_load $run_pkg; + # Note: in order to add WebFetch-embedding capability, the fetch # routine saves its raw data without any HTML/XML/etc formatting # in @{$obj->{data}} and data-to-savable conversion routines in @@ -386,45 +701,40 @@ # create the new object # this also calls the $obj->fetch() routine for the module which # has inherited from WebFetch to do this - $obj = eval 'new '.$run_pkg.' ( - "dir" => $dir, - (defined $group) ? ( "group" => $group ) : (), - (defined $mode) ? ( "mode" => $mode ) : (), - (defined $debug) ? ( "debug" => $debug ) : (), - (defined $dest) ? ( "dest" => $dest ) : (), - (defined $save_format) ? ( "save_format" => $save_format ) : (), - (defined $source) ? ( "source" => $source ) : (), - (defined $quiet) ? ( "quiet" => $quiet ) : (), - )'; + $debug and print STDERR "debug: run before new\n"; + $obj = eval $run_pkg."->new( \%\$options_ref )"; if ( $@ ) { - print STDERR "WebFetch: error: $@\n"; - exit 1; + throw_mod_run_failure( "module run failure: ".$@ ); } - # if the object had the data for the WebFetch-embedding API, + # if the object had data for the WebFetch-embedding API, # then data processing is external to the fetch routine # (This externalizes the data for other software to capture it.) + $debug and print STDERR "run before output\n"; + my $dest_format = $options_ref->{dest_format}; if (( defined $obj->{data}) and ( defined $obj->{actions})) { - if ( defined $obj->{dest}) { ( defined $obj->{actions}) or $obj->{actions} = {}; - ( defined $obj->{actions}{$save_format}) - or $obj->{actions}{$save_format} = []; - push @{$obj->{actions}{$save_format}}, [ $obj->{dest} ]; + ( defined $obj->{actions}{$dest_format}) + or $obj->{actions}{$dest_format} = []; + push @{$obj->{actions}{$dest_format}}, [ $obj->{dest} ]; } # perform requested actions on the data $obj->do_actions(); + } else { + throw_no_save( "save failed: no data or nowhere to save it" ); } - $result = $obj->save(); + $debug and print STDERR "run before save\n"; + my $result = $obj->save(); if ( ! $result ) { my $savable; foreach $savable ( @{$obj->{savable}}) { (ref $savable eq "HASH") or next; if ( defined $savable->{error}) { - WebFetch::Exception::Save->throw( - "error saving in ".$obj->{dir} + throw_save_error( "error saving in " + .$obj->{dir} ."file: ".$savable->{file} ."error: " .$savable->{error} ); } @@ -664,7 +974,8 @@ foreach $action_spec ( keys %{$self->{actions}} ) { my $handler_ref; - # check if there's a handler function for this action + # check for modules to handle the specified dest_format + my ( @handlers, %handlers ); my $action_handler = "fmt_handler_".$action_spec; if ( exists $modules{output}{$action_spec}) { my $class; @@ -816,8 +1127,7 @@ sub fetch { WebFetch::Exception::MustOverride->throw( - "WebFetch: fetch() " - ."function must be overridden by a derived module\n" ); + "fetch() function must be overridden by a derived module\n" ); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |