[PerlWikiBot] SF.net SVN: perlwikibot:[82] trunk/no-interwiki/prepare_noiw_list.pl
Status: Pre-Alpha
Brought to you by:
rotemliss
From: <am...@us...> - 2009-09-27 12:41:46
|
Revision: 82 http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=82&view=rev Author: amire80 Date: 2009-09-27 12:41:34 +0000 (Sun, 27 Sep 2009) Log Message: ----------- Refactoring, improving performance. Gmar khatima tova. Modified Paths: -------------- trunk/no-interwiki/prepare_noiw_list.pl Modified: trunk/no-interwiki/prepare_noiw_list.pl =================================================================== --- trunk/no-interwiki/prepare_noiw_list.pl 2009-09-27 00:44:08 UTC (rev 81) +++ trunk/no-interwiki/prepare_noiw_list.pl 2009-09-27 12:41:34 UTC (rev 82) @@ -30,7 +30,7 @@ # pragmata use strict; use warnings; -use integer; +use integer; # Improves performance use open ':utf8'; use utf8; use charnames ':full'; @@ -54,7 +54,7 @@ use Regexp::Common; use Lingua::Translit; use Log::Log4perl qw(:easy); -use Parse::MediaWikiDump 0.92; # Earlier versions have a different API +use Parse::MediaWikiDump 0.91; # Earlier versions have a different API our $VERSION = '0.2.1'; #<<< no perltidy @@ -366,11 +366,10 @@ } } -my $pmwd = Parse::MediaWikiDump->new(); -my $dump = $pmwd->revisions($dump_fn); +my $the_dump = load_dump(); my $namespaces_alt = join $ALT_SEP, - grep { length > 0 } @{ $dump->namespaces_names() }; + grep { length > 0 } @{ $the_dump->namespaces_names() }; my $PURE_TITLE_RE = qr{ \A (?: @@ -383,7 +382,7 @@ # This is the only currently known value # but there could be more in the future -if ($dump->case() ne 'first-letter') { +if ($the_dump->case() ne 'first-letter') { croak(q{unable to handle any case setting besides 'first-letter'}); } @@ -420,12 +419,35 @@ exit; +sub load_dump { + Readonly my $WORKING_PMWD_VER => 0.91; + Readonly my $FUTURE_PMWD_VER => 0.94; + + my $dump; + + if ($Parse::MediaWikiDump::VERSION == $WORKING_PMWD_VER) { + $dump = Parse::MediaWikiDump::Pages->new($dump_fn); + } + else { + if ($Parse::MediaWikiDump::VERSION < $FUTURE_PMWD_VER) { + carp( 'You are running Parse::MediaWikiDump version ' + . $Parse::MediaWikiDump::VERSION + . ".\n Redirect handling may be broken\n"); + } + my $pmwd = Parse::MediaWikiDump->new(); + $dump = $pmwd->revisions($dump_fn); + } + + return $dump; +} + sub namespace { my ($page) = @_; return $page->namespace() || get_string('article space'); } sub next_page { + my ($dump) = @_; my $page_ref = eval { $dump->next(); }; if ($EVAL_ERROR) { confess("Failed reading a page: $EVAL_ERROR"); @@ -435,7 +457,7 @@ sub find_iwless { PAGE: - while (my $page_ref = next_page()) { + while (my $page_ref = next_page($the_dump)) { $page_counter++; if ($page_counter % $option{page_freq} == 0) { say $page_counter; @@ -1390,12 +1412,14 @@ foreach my $namespace (keys %{ $statistics{'has no interwiki link'} }) { my $iwless_in_namespace = $statistics{'has no interwiki link'}->{$namespace}; + no integer; ## no critic (ProhibitMagicNumbers) my $percentage = sprintf '%.2f', 100 * $iwless_in_namespace / $namespace_count{$namespace}; ## use critic (ValuesAndExpressions::ProhibitMagicNumbers) use integer; + INFO("$namespace: $iwless_in_namespace, $percentage%"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |