[PerlWikiBot] SF.net SVN: perlwikibot:[86] trunk/no-interwiki/prepare_noiw_list.pl
Status: Pre-Alpha
Brought to you by:
rotemliss
|
From: <am...@us...> - 2010-03-25 15:02:34
|
Revision: 86
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=86&view=rev
Author: amire80
Date: 2010-03-25 15:02:28 +0000 (Thu, 25 Mar 2010)
Log Message:
-----------
Update of empty pages handling.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2010-03-09 12:44:41 UTC (rev 85)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2010-03-25 15:02:28 UTC (rev 86)
@@ -280,9 +280,7 @@
my $SECTION_LINK_RE = qr{(?<!&)\#}xms;
my $LOWERCASE_LINK_RE = qr{\A[[:lower:]]}xms;
-## no critic (RegularExpressions::ProhibitEscapedMetacharacters)
my $TRUE_TEMPLATE_RE = qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms;
-## use critic (RegularExpressions::ProhibitEscapedMetacharacters)
# get_string() cannot be used in re
my $string_exclude_lowercase = get_string('exclude_lowercase');
@@ -420,23 +418,9 @@
exit;
sub load_dump {
- Readonly my $WORKING_PMWD_VER => 0.91;
- Readonly my $FUTURE_PMWD_VER => 0.94;
-
my $dump;
- if ($Parse::MediaWikiDump::VERSION == $WORKING_PMWD_VER) {
- $dump = Parse::MediaWikiDump::Pages->new($dump_fn);
- }
- else {
- if ($Parse::MediaWikiDump::VERSION < $FUTURE_PMWD_VER) {
- carp( 'You are running Parse::MediaWikiDump version '
- . $Parse::MediaWikiDump::VERSION
- . ".\n Redirect handling may be broken\n");
- }
- my $pmwd = Parse::MediaWikiDump->new();
- $dump = $pmwd->revisions($dump_fn);
- }
+ $dump = Parse::MediaWikiDump::Pages->new($dump_fn);
return $dump;
}
@@ -471,6 +455,12 @@
my $page_text_ref = $page_ref->text();
+ if (${$page_text_ref} !~ /\S/xmsi
+ and not is_in_namespace($page_ref, 'User', 'User talk'))
+ {
+ special_cases_file('empty_page', $page_ref);
+ }
+
if (
$page_counter < $option{start_from}
or not defined ${$page_text_ref} # must be tested before redirect
@@ -1266,6 +1256,12 @@
sub print_multi_links_by_foreign {
LANG_CODE:
foreach my $lang_code (sort keys %found_links) {
+ say "processing $lang_code";
+ my $lang_reftype = ref $found_links{$lang_code};
+ if ($lang_reftype ne 'HASH') {
+ carp('$lang_code is $lang_reftype, not hashref!');
+ next LANG_CODE;
+ }
my $filename = "$MULTI_DIR/$lang_code.$WIKITEXT_EXT";
my @foreign_articles = sort keys %{ $found_links{$lang_code} };
say format_link_table($lang_code, scalar @foreign_articles);
@@ -1274,6 +1270,14 @@
foreach my $foreign_article (@foreign_articles) {
my @local_articles =
keys %{ $found_links{$lang_code}->{$foreign_article} };
+
+ my $article_reftype =
+ ref $found_links{$lang_code}->{$foreign_article};
+ if ($article_reftype ne 'HASH') {
+ carp("$foreign_article is $article_reftype, not hashref!");
+ next FOREIGN_ARTICLE;
+ }
+
if (scalar @local_articles > 1) {
my $links = join q{ | }, sort map { make_link($_) }
keys %{ $found_links{$lang_code}->{$foreign_article} };
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|