[PerlWikiBot] SF.net SVN: perlwikibot:[82] trunk/no-interwiki/prepare_noiw_list.pl
Status: Pre-Alpha
Brought to you by:
rotemliss
|
From: <am...@us...> - 2009-09-27 12:41:46
|
Revision: 82
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=82&view=rev
Author: amire80
Date: 2009-09-27 12:41:34 +0000 (Sun, 27 Sep 2009)
Log Message:
-----------
Refactoring, improving performance. Gmar khatima tova.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2009-09-27 00:44:08 UTC (rev 81)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2009-09-27 12:41:34 UTC (rev 82)
@@ -30,7 +30,7 @@
# pragmata
use strict;
use warnings;
-use integer;
+use integer; # Improves performance
use open ':utf8';
use utf8;
use charnames ':full';
@@ -54,7 +54,7 @@
use Regexp::Common;
use Lingua::Translit;
use Log::Log4perl qw(:easy);
-use Parse::MediaWikiDump 0.92; # Earlier versions have a different API
+use Parse::MediaWikiDump 0.91; # Earlier versions have a different API
our $VERSION = '0.2.1';
#<<< no perltidy
@@ -366,11 +366,10 @@
}
}
-my $pmwd = Parse::MediaWikiDump->new();
-my $dump = $pmwd->revisions($dump_fn);
+my $the_dump = load_dump();
my $namespaces_alt = join $ALT_SEP,
- grep { length > 0 } @{ $dump->namespaces_names() };
+ grep { length > 0 } @{ $the_dump->namespaces_names() };
my $PURE_TITLE_RE = qr{
\A
(?:
@@ -383,7 +382,7 @@
# This is the only currently known value
# but there could be more in the future
-if ($dump->case() ne 'first-letter') {
+if ($the_dump->case() ne 'first-letter') {
croak(q{unable to handle any case setting besides 'first-letter'});
}
@@ -420,12 +419,35 @@
exit;
+sub load_dump {
+ Readonly my $WORKING_PMWD_VER => 0.91;
+ Readonly my $FUTURE_PMWD_VER => 0.94;
+
+ my $dump;
+
+ if ($Parse::MediaWikiDump::VERSION == $WORKING_PMWD_VER) {
+ $dump = Parse::MediaWikiDump::Pages->new($dump_fn);
+ }
+ else {
+ if ($Parse::MediaWikiDump::VERSION < $FUTURE_PMWD_VER) {
+ carp( 'You are running Parse::MediaWikiDump version '
+ . $Parse::MediaWikiDump::VERSION
+ . ".\n Redirect handling may be broken\n");
+ }
+ my $pmwd = Parse::MediaWikiDump->new();
+ $dump = $pmwd->revisions($dump_fn);
+ }
+
+ return $dump;
+}
+
sub namespace {
my ($page) = @_;
return $page->namespace() || get_string('article space');
}
sub next_page {
+ my ($dump) = @_;
my $page_ref = eval { $dump->next(); };
if ($EVAL_ERROR) {
confess("Failed reading a page: $EVAL_ERROR");
@@ -435,7 +457,7 @@
sub find_iwless {
PAGE:
- while (my $page_ref = next_page()) {
+ while (my $page_ref = next_page($the_dump)) {
$page_counter++;
if ($page_counter % $option{page_freq} == 0) {
say $page_counter;
@@ -1390,12 +1412,14 @@
foreach my $namespace (keys %{ $statistics{'has no interwiki link'} }) {
my $iwless_in_namespace =
$statistics{'has no interwiki link'}->{$namespace};
+
no integer;
## no critic (ProhibitMagicNumbers)
my $percentage = sprintf '%.2f',
100 * $iwless_in_namespace / $namespace_count{$namespace};
## use critic (ValuesAndExpressions::ProhibitMagicNumbers)
use integer;
+
INFO("$namespace: $iwless_in_namespace, $percentage%");
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|