[PerlWikiBot] SF.net SVN: perlwikibot:[86] trunk/no-interwiki/prepare_noiw_list.pl
Status: Pre-Alpha
Brought to you by:
rotemliss
From: <am...@us...> - 2010-03-25 15:02:34
|
Revision: 86 http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=86&view=rev Author: amire80 Date: 2010-03-25 15:02:28 +0000 (Thu, 25 Mar 2010) Log Message: ----------- Update of empty pages handling. Modified Paths: -------------- trunk/no-interwiki/prepare_noiw_list.pl Modified: trunk/no-interwiki/prepare_noiw_list.pl =================================================================== --- trunk/no-interwiki/prepare_noiw_list.pl 2010-03-09 12:44:41 UTC (rev 85) +++ trunk/no-interwiki/prepare_noiw_list.pl 2010-03-25 15:02:28 UTC (rev 86) @@ -280,9 +280,7 @@ my $SECTION_LINK_RE = qr{(?<!&)\#}xms; my $LOWERCASE_LINK_RE = qr{\A[[:lower:]]}xms; -## no critic (RegularExpressions::ProhibitEscapedMetacharacters) my $TRUE_TEMPLATE_RE = qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms; -## use critic (RegularExpressions::ProhibitEscapedMetacharacters) # get_string() cannot be used in re my $string_exclude_lowercase = get_string('exclude_lowercase'); @@ -420,23 +418,9 @@ exit; sub load_dump { - Readonly my $WORKING_PMWD_VER => 0.91; - Readonly my $FUTURE_PMWD_VER => 0.94; - my $dump; - if ($Parse::MediaWikiDump::VERSION == $WORKING_PMWD_VER) { - $dump = Parse::MediaWikiDump::Pages->new($dump_fn); - } - else { - if ($Parse::MediaWikiDump::VERSION < $FUTURE_PMWD_VER) { - carp( 'You are running Parse::MediaWikiDump version ' - . $Parse::MediaWikiDump::VERSION - . ".\n Redirect handling may be broken\n"); - } - my $pmwd = Parse::MediaWikiDump->new(); - $dump = $pmwd->revisions($dump_fn); - } + $dump = Parse::MediaWikiDump::Pages->new($dump_fn); return $dump; } @@ -471,6 +455,12 @@ my $page_text_ref = $page_ref->text(); + if (${$page_text_ref} !~ /\S/xmsi + and not is_in_namespace($page_ref, 'User', 'User talk')) + { + special_cases_file('empty_page', $page_ref); + } + if ( $page_counter < $option{start_from} or not defined ${$page_text_ref} # must be tested before redirect @@ -1266,6 +1256,12 @@ sub print_multi_links_by_foreign { LANG_CODE: foreach my $lang_code (sort keys %found_links) { + say "processing $lang_code"; + my $lang_reftype = ref $found_links{$lang_code}; + if ($lang_reftype ne 'HASH') { + carp('$lang_code is $lang_reftype, not hashref!'); + next LANG_CODE; + } my $filename = "$MULTI_DIR/$lang_code.$WIKITEXT_EXT"; my @foreign_articles = sort keys %{ $found_links{$lang_code} }; say format_link_table($lang_code, scalar @foreign_articles); @@ -1274,6 +1270,14 @@ foreach my $foreign_article (@foreign_articles) { my @local_articles = keys %{ $found_links{$lang_code}->{$foreign_article} }; + + my $article_reftype = + ref $found_links{$lang_code}->{$foreign_article}; + if ($article_reftype ne 'HASH') { + carp("$foreign_article is $article_reftype, not hashref!"); + next FOREIGN_ARTICLE; + } + if (scalar @local_articles > 1) { my $links = join q{ | }, sort map { make_link($_) } keys %{ $found_links{$lang_code}->{$foreign_article} }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |