[PerlWikiBot] SF.net SVN: perlwikibot:[73] trunk/no-interwiki/prepare_noiw_list.pl
Status: Pre-Alpha
Brought to you by:
rotemliss
From: <am...@us...> - 2009-09-08 19:20:25
|
Revision: 73 http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=73&view=rev Author: amire80 Date: 2009-09-08 19:20:10 +0000 (Tue, 08 Sep 2009) Log Message: ----------- cleanup before upgrading to Parse::MediaWikiDump 0.92 Modified Paths: -------------- trunk/no-interwiki/prepare_noiw_list.pl Modified: trunk/no-interwiki/prepare_noiw_list.pl =================================================================== --- trunk/no-interwiki/prepare_noiw_list.pl 2008-11-21 23:07:37 UTC (rev 72) +++ trunk/no-interwiki/prepare_noiw_list.pl 2009-09-08 19:20:10 UTC (rev 73) @@ -12,7 +12,7 @@ # This program is Free Software; you can redistribute it and/or modify it # under the same terms as Perl itself. -# Upgrade! This script actually uses new Perl 5.10 constructs, so you need it +# Upgrade! This program actually uses new Perl 5.10 constructs, so you need it use 5.010; # pragmata @@ -27,7 +27,6 @@ use English qw(-no_match_vars); use Carp qw(croak cluck); use Time::Local; -use List::Util qw(first); use Getopt::Long; use Data::Dumper; use File::Basename; @@ -36,14 +35,18 @@ # You must install these modules from CPAN # Versions before 0.51 have a bug with parsing the namespace of a page -use Parse::MediaWikiDump 0.51; +use Readonly; use Regexp::Common; use Lingua::Translit; use Log::Log4perl qw(:easy); -use Readonly; +use Parse::MediaWikiDump 0.51; # Earlier versions have a bug in namespaces +# XXX +use Devel::Leak; +use Devel::Size qw(size total_size); + #<<< no perltidy -my %SVN_PROPS = ( ## no critic ValuesAndExpressions::RequireInterpolationOfMetachars +my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars) Revision => '$Revision$', HeadURL => '$HeadURL$', Date => '$Date$', @@ -53,7 +56,6 @@ ? "0.1.9.$+{revision_num}" : croak(q(Something is wrong with SVN revision number)); -my %PATTERN; Readonly my $WIKITEXT_EXT => 'wiki.txt'; Readonly my $OUT_DIR => 'out'; Readonly my $UNSORTED_DIR => "$OUT_DIR/unsorted"; @@ -102,15 +104,15 @@ } # XXX Too coupled to Wikipedia, won't work for other projects. -$PATTERN{dump_fn} = qr{ +Readonly my $DUMP_FN_RE => qr{ \A # Begin string (?<wiki_lang>\w+) # Lang code wiki # Is supposed to be after the lang code }xms; -$PATTERN{field_sep} = qr{\t}xms; +Readonly my $FIELD_SEP_RE => qr{\t}xms; -$PATTERN{string_skip} = qr{ +Readonly my $STRING_SKIP_RE => qr{ \A # Begin string \s* # Zero or more spaces (?:\#.*)? # Comment lines @@ -122,9 +124,9 @@ or croak('Dump filename must be supplied as an argument.'); my %STRING; my $WIKI_LANG; -if ((basename $dump_fn) =~ $PATTERN{dump_fn}) { +if ((basename $dump_fn) =~ $DUMP_FN_RE) { $WIKI_LANG = $+{wiki_lang}; - %STRING = get_strings($WIKI_LANG); + get_strings($WIKI_LANG); } else { croak("$dump_fn is a weird dump file name."); @@ -135,8 +137,8 @@ my $TRANSLITERATOR = Lingua::Translit->new('ISO 9'); Readonly my %MW_SYNTAX => ( - 'start_tmpl' => '{{', - 'end_tmpl' => '}}', + 'start_template' => '{{', + 'end_template' => '}}', 'start_link' => '[[', 'end_link' => ']]', 'param_sep' => q{|}, @@ -146,7 +148,7 @@ ); # This monstrosity basically says: | and optional spaces -$PATTERN{param_sep} = qr{\s*\Q$MW_SYNTAX{param_sep}\E\s*}xms; +Readonly my $PARAM_SEP_RE => qr{\s*\Q$MW_SYNTAX{param_sep}\E\s*}xms; Readonly my @INCLUDE_NAMESPACES => ('article space', 'Category', 'Portal'); @@ -168,7 +170,7 @@ @REV_MONTH{@MONTHS} = (0 .. $LAST_MONTH); # XXX Internationalize -$PATTERN{heb_date} = qr{ +Readonly my $HEB_DATE_RE => qr{ \A # begin (?<hour>\d{1,2}) # hour : # : @@ -182,7 +184,7 @@ \z # end }xms; -$PATTERN{param} = qr{ +Readonly my $PARAM_RE => qr{ \A # Beginning of a string \s* # Zero or more space (?: # No capture @@ -194,22 +196,22 @@ }xms; # XXX It should use get_string() -$PATTERN{simple_no_iw_check} = qr{ +Readonly my $SIMPLE_NO_IW_CHECK_RE => qr{ \Q$STRING{no_iw}\E # The string may have spaces }xmsi; # A simplistic template just for testing. # Quite possibly it is not needed anymore. # Until i get a better regex for matching balancing {{}} ... -$PATTERN{template} = qr{ +Readonly my $TEMPLATE_RE => qr{ \A # beginning of string - \Q$MW_SYNTAX{start_tmpl}\E # {{ + \Q$MW_SYNTAX{start_template}\E # {{ .+ # some chars - \Q$MW_SYNTAX{end_tmpl}\E # }} + \Q$MW_SYNTAX{end_template}\E # }} \z # end of string }xms; -$PATTERN{wikitable} = qr{ +Readonly my $WIKITABLE_RE => qr{ \A \Q$MW_SYNTAX{start_wikitable}\E }xms; @@ -218,7 +220,7 @@ # Regular expression mostly copied from # Parse::MediaWikiDump::page::redirect # TODO: Try to look for the local language redirect keyword in the dump. -$PATTERN{local_redirect} = qr{ +Readonly my $LOCAL_REDIRECT_RE => qr{ \A # Beginning of string (page) \# # a # character $STRING{REDIRECT} # Redirect keyword in local language @@ -226,33 +228,44 @@ \s*:?\s*\[\[([^\]]*)\]\] # the link after the redirect }xmsi; -$PATTERN{ltr_char} = qr/\P{IsLeftToRight}/xms; -$PATTERN{true_template} = qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms; -$PATTERN{section_link} = qr{(?<!&)\#}xms; -$PATTERN{lowercase_link} = qr{\A[[:lower:]]}xms; +Readonly my $LTR_CHAR_RE => qr/\P{IsLeftToRight}/xms; +Readonly my $SECTION_LINK_RE => qr{(?<!&)\#}xms; +Readonly my $LOWERCASE_LINK_RE => qr{\A[[:lower:]]}xms; -# XXX get_string() cannot be used here -if ($STRING{exclude_lowercase}) { - $PATTERN{exclude_lowercase} = qr{\A[$STRING{exclude_lowercase}]}xms; -} -$PATTERN{numbered_file} = qr{ +## no critic (ProhibitEscapedMetacharacters) +Readonly my $TRUE_TEMPLATE_RE => qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms; +## use critic (ProhibitEscapedMetacharacters) + +# get_string() cannot be used in re +my $string_exclude_lowercase = get_string('exclude_lowercase'); +Readonly my $EXCLUDE_LOWERCASE_RE => qr{ + \A # Beginning of foreign article name + [$string_exclude_lowercase] # Character class of possibly lowercase chars +}xms; +Readonly my $NUMBERED_FILE_RE => qr{ (?: _ \d*)? \.$WIKITEXT_EXT }xms; -$PATTERN{invalid_filename_char} = qr{[\\\n/:*?"<>|]}xms; # " +Readonly my $INVALID_FILENAME_CHAR_RE => qr{[\\\n/:*?"<>|]}xms; # " -# TODO: Check whether it is Neapolitan with its '' -$PATTERN{character_code_in_link} = qr{ +my $two_digit_charnumber_re = qr{ (?: [%.] # There are both %C4%B0 and .AA.E0 [[:xdigit:]]{2} # 2 hex digits ) - | +}xms; + +my $html_charnumber_re = qr{ (?: &\#\d+; # stuff like Š ) }xms; +# TODO: Check whether it is Neapolitan with its '' +Readonly my $CHARACTER_CODE_IN_LINK_RE => qr{ + $two_digit_charnumber_re | $html_charnumber_re +}xms; + my %LANG_CODE; Readonly my $LANG_CODE_FN => "$WIKI_LANG.language_codes.txt"; open my $lang_code_file, '<', $LANG_CODE_FN @@ -260,7 +273,7 @@ while (my $line = <$lang_code_file>) { chomp $line; my ($code, $name) = split /\t/xms, $line; - $LANG_CODE{$code} = $name // $code; + $LANG_CODE{$code} = $name // $code; # / } close $lang_code_file @@ -268,7 +281,7 @@ Readonly my $ALT_LANGS => join $ALT_SEP, keys %LANG_CODE; -$PATTERN{interwiki_link} = qr{ +Readonly my $INTERWIKI_LINK_RE => qr{ \Q$MW_SYNTAX{start_link}\E (?<lang_code> $ALT_LANGS @@ -282,6 +295,7 @@ # Lojban allows lowercase articles Readonly my @LOWERCASE_LANGS => qw(jbo); + Readonly my $COOLING_DAYS => 120; Readonly my $COOLING_SECONDS => $COOLING_DAYS * 24 * 60 * 60; Readonly my $LATEST_COOLING => time - $COOLING_SECONDS; @@ -302,11 +316,12 @@ } } -my $dump = Parse::MediaWikiDump::Pages->new($dump_fn); +my $pmwd = Parse::MediaWikiDump->new(); +my $dump = $pmwd->revisions($dump_fn); my $namespaces_alt = join $ALT_SEP, grep { length > 0 } @{ $dump->namespaces_names() }; -$PATTERN{pure_title} = qr{ +Readonly my $PURE_TITLE_RE => qr{ \A (?: (?:$namespaces_alt) @@ -359,12 +374,27 @@ return $page->namespace() || get_string('article space'); } +sub next_page { + my $page_ref = eval { $dump->next(); }; + if ($EVAL_ERROR) { + confess("Failed reading a page: $EVAL_ERROR"); + } + return $page_ref; +} + sub find_iwless { + my $leak_handle; + my $leak_count; + PAGE: - while (my $page = $dump->page()) { + while (my $page_ref = next_page()) { $page_counter++; if ($page_counter % $option{page_freq} == 0) { say $page_counter; + # my $lead_count = Devel::Leak::NoteSV($leak_handle); + # say "leak count: $lead_count"; + # say 'Devel::Size size of buck dump: ', total_size($dump); + # say 'Devel::Size total_size of buck page_ref: ', total_size($page_ref); } last PAGE @@ -374,45 +404,45 @@ next PAGE if ($page_counter < $option{start_from}); - my $namespace = namespace($page); - my $page_title = $page->title(); + my $page_namespace = namespace($page_ref); + my $page_title = $page_ref->title(); # Skipping cases: next PAGE if ( - not is_in_namespace($page, @INCLUDE_NAMESPACES) - or is_redirect($page) + not is_in_namespace($page_ref, @INCLUDE_NAMESPACES) + or is_redirect($page_ref) # TODO: Be more precise here. # Portal pages which have a '/' in their name are probably # internal and do not need interwiki links. - or (is_in_namespace($page, 'Portal') and $page_title =~ m{/}xms) + or (is_in_namespace($page_ref, 'Portal') + and $page_title =~ m{/}xms) ); - $namespace_count{$namespace}++; + $namespace_count{$page_namespace}++; INFO("\n* processing $page_counter - ", $page_title); - my $page_text_ref = $page->text(); + my $page_text_ref = $page_ref->text(); # A simple sanity check: is the no_iw template anywhere around here? - my $has_tmpl_no_iw = - (${$page_text_ref} =~ $PATTERN{simple_no_iw_check}); + my $has_template_no_iw = ($page_text_ref =~ $SIMPLE_NO_IW_CHECK_RE); # Does the page have interwiki links? # BIG XXX Actually checks only for English - my $has_iw = has_interwiki($page); + my $has_iw = has_interwiki($page_ref); if ($has_iw) { INFO("has link to $has_iw"); - if ($has_tmpl_no_iw) { + if ($has_template_no_iw) { INFO('has template no_iw. trying to remove ...'); - remove_tmpl_no_iw($page_text_ref); + remove_template_no_iw($page_text_ref); $statistics{'has both valid interwiki and template'}++; - special_cases_file('outdated_template', $page); + special_cases_file('outdated_template', $page_ref); } } else { # does not have iw - process_iwless_page($page, $has_tmpl_no_iw, $has_iw); + process_iwless_page($page_ref, $has_template_no_iw, $has_iw); } } @@ -421,25 +451,24 @@ sub process_iwless_page { my ( - $page, # object ref - $has_tmpl_no_iw, # scalar bool - $has_iw # scalar bool + $page_ref, # object ref + $has_template_no_iw, # scalar bool + $has_iw # scalar bool ) = @_; INFO(q(does not have iw link.)); - $statistics{'has no interwiki link'}->{ namespace($page) }++; + $statistics{'has no interwiki link'}->{ namespace($page_ref) }++; # Now we need to search for no_iw templates # and parse their parameters - date and type my @found_templates = (); - my $page_text_ref = $page->text(); - my $page_title = $page->title(); + my $page_text_ref = $page_ref->text(); # Optimized - does not start searching, # if we already know that it is not there - if ($has_tmpl_no_iw) { + if ($has_template_no_iw) { find_templates($page_text_ref, \@found_templates, [ get_string('no_iw') ]); } @@ -455,7 +484,7 @@ if ($found_templates_count > 1) { WARN('many templates were found'); $statistics{'many templates'}++; - special_cases_file('many_templates', $page); + special_cases_file('many_templates', $page_ref); } else { INFO('good, found one template'); @@ -476,7 +505,7 @@ if (not defined $date_ref) { INFO("invalid date: '$date_str'"); $statistics{'invalid date'}++; - special_cases_file('invalid_date', $page); + special_cases_file('invalid_date', $page_ref); } elsif (cooling_date_passed($date_ref)) { INFO('cooling date passed, updating to today ...'); @@ -494,11 +523,11 @@ } - my @all_types = get_all_types($template->{params}->{type}, $page); + my @all_types = get_all_types($template->{params}->{type}, $page_ref); foreach my $type (@all_types) { - INFO("adding $page_title to the list as type $type"); - add_to_no_iw_list($page, $type); + INFO('adding ' . $page_ref->title() . " to the list as type $type"); + add_to_no_iw_list($page_ref, $type); $type_count{$type}++; } @@ -508,7 +537,7 @@ sub get_all_types { my ($type_param, $page) = @_; - $type_param //= q{}; + $type_param //= q{}; # / strip_whitespace($type_param); my @all_types = split $TYPE_SEP, $type_param; @@ -545,23 +574,23 @@ sub find_templates { my ( - $page_text_ref, # string ref + $text_ref, # string ref $found_templates_ref, # array ref $filter # string array ref ) = @_; # A reference to an array with one empty string. # Matching against an empty string will always succeed. - $filter //= [q{}]; + $filter //= [q{}]; # / # Get all highest-level matches - my @matches = (${$page_text_ref} =~ /$PATTERN{true_template}/xmsgo); + my @matches = (${$text_ref} =~ /$TRUE_TEMPLATE_RE/xmsgo); MATCH: foreach my $next_match (@matches) { - if ($next_match !~ $PATTERN{template}) { + if ($next_match !~ $TEMPLATE_RE) { INFO(q(i thought that it is a template, but it was:)); - if ($next_match =~ $PATTERN{wikitable}) { + if ($next_match =~ $WIKITABLE_RE) { INFO('a wikitable'); } else { @@ -574,7 +603,8 @@ foreach my $next_filter (@{$filter}) { # N.B. - case-insensitive. Wrong, but kinda useful. - if ($next_match =~ /\A\Q$MW_SYNTAX{'start_tmpl'}$next_filter/xmsi) + if ($next_match =~ + /\A\Q$MW_SYNTAX{'start_template'}$next_filter/xmsi) { # N.B.: parse_template calls find_templates() recursively @@ -592,7 +622,7 @@ my ( $template, # string ref $default_param_names, # string array ref - $subtmpl_filter, # string array ref + $subtemplate_filter, # string array ref ) = @_; # %parsed_template: @@ -602,16 +632,16 @@ # {subtemplates} - array ref my (%parsed_template, %parsed_params, @clauses); - $parsed_template{text} = strip_tmpl_curlies(${$template}); + $parsed_template{text} = strip_template_curlies(${$template}); # First string of the split is the template name, # the rest is the params ($parsed_template{name}, @clauses) = - (split $PATTERN{param_sep}, ${ $parsed_template{text} }); + (split $PARAM_SEP_RE, ${ $parsed_template{text} }); my $param_counter = 0; foreach my $clause (@clauses) { - if ($clause =~ $PATTERN{param}) { + if ($clause =~ $PARAM_RE) { #<<< no perltidy my ($name, $value) = @+{ qw(param_name value) }; #>>> @@ -630,7 +660,7 @@ $parsed_params{$name} = $value; } else { - my $error_msg = "Weird - $clause does not look a param"; + my $error_msg = "Weird - $clause does not look like a param"; INFO($error_msg); cluck($error_msg); $statistics{'weird param'}++; @@ -641,7 +671,7 @@ # Possible recursion find_templates($parsed_template{text}, $parsed_template{subtemplates}, - $subtmpl_filter); + $subtemplate_filter); return \%parsed_template; } @@ -651,7 +681,7 @@ return if (not defined $date_str); - if ($date_str =~ $PATTERN{heb_date}) { + if ($date_str =~ $HEB_DATE_RE) { INFO("found a valid date: $date_str"); my %parsed_date = ( 'sec' => 0, # useful for timelocal @@ -677,15 +707,15 @@ return; } -sub strip_tmpl_curlies { +sub strip_template_curlies { my ($template) = @_; for ($template) { s{ \A - \Q$MW_SYNTAX{start_tmpl}\E + \Q$MW_SYNTAX{start_template}\E }{}xms; s{ - \Q$MW_SYNTAX{end_tmpl}\E + \Q$MW_SYNTAX{end_template}\E \z }{}xms; } @@ -693,7 +723,7 @@ } # no arg unpacking for simplicity and performance -sub strip_whitespace { ## no critic Subroutines::RequireArgUnpacking +sub strip_whitespace { ## no critic (RequireArgUnpacking) for (@_) { s/\A\s*//xms; s/\s*\z//xms; @@ -705,13 +735,14 @@ my ($page) = @_; my $page_title = $page->title(); - my $page_text = ${ $page->text() }; - study $page_text; + my $page_text = ${ $page->text() }; # XXX + study $page_text; # XXX + my %iw_links; my %special_cases; - while ($page_text =~ /$PATTERN{interwiki_link}/xmsgo) { + while ($page_text =~ /$INTERWIKI_LINK_RE/xmsgo) { # XXX my ($lang_code, $foreign_article) = @+{qw(lang_code foreign_article)}; if (defined $iw_links{$lang_code}) { $special_cases{double_links}->{$lang_code} = q{}; @@ -722,31 +753,31 @@ # A # sign not after an &. # After an & it is probably a character number. - if ($foreign_article =~ $PATTERN{section_link}) { + if ($foreign_article =~ $SECTION_LINK_RE) { $special_cases{section_links}->{$lang_code} = q{}; } # Char codes are common in section links, so there is no # need to show them again - elsif ($foreign_article =~ $PATTERN{character_code_in_link}) { - $special_cases{charnumber_links}{$lang_code} = q{}; + elsif ($foreign_article =~ $CHARACTER_CODE_IN_LINK_RE) { + $special_cases{charnumber_links}->{$lang_code} = q{}; } # Lowercase links if ( (not $lang_code ~~ @LOWERCASE_LANGS) - and ($foreign_article =~ $PATTERN{lowercase_link})) + and ($foreign_article =~ $LOWERCASE_LINK_RE)) { my $include_lowercase_link = 1; # XXX get_string() cannot be used here if (defined $STRING{exclude_lowercase} - and $foreign_article =~ $PATTERN{exclude_lowercase}) + and $foreign_article =~ $EXCLUDE_LOWERCASE_RE) { $include_lowercase_link = 0; } if ($include_lowercase_link) { - $special_cases{lowercase_links}{$lang_code} = q{}; + $special_cases{lowercase_links}->{$lang_code} = q{}; } } @@ -801,7 +832,7 @@ return; } -sub remove_tmpl_no_iw { +sub remove_template_no_iw { my ($params) = @_; INFO( "Supposed to remove the no_iw template now, but ...\n" . 'This sub is a stub. You can help Wikipedia by expanding it!'); @@ -853,7 +884,7 @@ my ($page_title) = @_; my $sort_title; - if ($page_title =~ $PATTERN{pure_title}) { + if ($page_title =~ $PURE_TITLE_RE) { $sort_title = $+{pure_title}; } else { @@ -901,7 +932,7 @@ . $MW_SYNTAX{end_link}; if ($option{rtl}) { - if ($page_title =~ $PATTERN{ltr_char}) { + if ($page_title =~ $LTR_CHAR_RE) { # XXX get_string() cannot be used here $link_to_page = $STRING{rlm} . $link_to_page . $STRING{rlm}; @@ -930,9 +961,9 @@ chomp $type_name; foreach my $line (@lines) { chomp $line; - my ($page_title, $sort_title) = split $PATTERN{field_sep}, $line; + my ($page_title, $sort_title) = split $FIELD_SEP_RE, $line; my $sort_letter = get_sort_letter($sort_title); - $all_pages_in_type{$sort_letter} //= []; + $all_pages_in_type{$sort_letter} //= []; # / push @{ $all_pages_in_type{$sort_letter} }, $page_title; } write_sorted_pages($type_name, \%all_pages_in_type); @@ -965,8 +996,6 @@ write_page(\$page, \$type_fn, $file_number++); $section_counter = 0; - # N.B. Trying to free memory, not guaranteed - undef $page; $page = q{}; } elsif ($section_counter) { @@ -988,16 +1017,12 @@ [ @all_links_in_letter[ $first_link .. $last_link ] ]); $page .= $links; } - - # N.B. Trying to free memory, not guaranteed - undef @all_links_in_letter; } # The page may be empty at this point if ($page) { write_page(\$page, \$type_fn, $file_number++); } - undef $page; return; } @@ -1007,7 +1032,7 @@ my $pretty_file_number = sprintf '%03d', $file_number; ${$type_fn_ref} =~ s{ - $PATTERN{numbered_file} + $NUMBERED_FILE_RE } {_$pretty_file_number.$WIKITEXT_EXT}xmso; INFO("creating file ${$type_fn_ref}"); @@ -1036,7 +1061,7 @@ # Custom Unicode character property for finding characters. # The custom is to give those subroutines CamelCase names. -sub IsLeftToRight { ## no critic NamingConventions::ProhibitMixedCaseSubs +sub IsLeftToRight { ## no critic (Capitalization) return <<'END'; +utf8::InHebrew +utf8::IsSpace @@ -1053,7 +1078,7 @@ INFO("\nEnglish redirect: $page_title"); return 1; } - if (${$page_text_ref} =~ $PATTERN{local_redirect}) { + if (${$page_text_ref} =~ $LOCAL_REDIRECT_RE) { INFO("\nLocal redirect: $page_title"); return 1; } @@ -1093,22 +1118,20 @@ close $STRINGS_FILE or croak(file_error('closing', $STRINGS_FN, 'reading')); - my %STRING; - STRING_LINE: foreach my $next_string_line (@strings_file_lines) { # Skip blanks and comments - next STRING_LINE if ($next_string_line =~ $PATTERN{string_skip}); + next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE); chomp $next_string_line; - my ($english, $target) = split $PATTERN{field_sep}, $next_string_line; + my ($english, $target) = split $FIELD_SEP_RE, $next_string_line; # Fallback to English if no target language string was supplied $STRING{$english} = $target // $english; # / } - return %STRING; + return; } sub get_string { @@ -1125,7 +1148,7 @@ my $type_fn = "$transliterated_type.$WIKITEXT_EXT"; - $type_fn =~ s{$PATTERN{invalid_filename_char}}{-}xmsgo; + $type_fn =~ s{$INVALID_FILENAME_CHAR_RE}{-}xmsgo; my $dir = $unsorted ? $UNSORTED_DIR : $OUT_DIR; $type_fn = "$dir/$type_fn"; @@ -1138,7 +1161,7 @@ open my $file, '>>:utf8', $fn or croak(file_error('opening', $fn, 'appending')); - say {$file} ($line // q{}); + say {$file} ($line // q{}); # / close $file or croak(file_error('closing', $fn, 'appeding')); @@ -1236,11 +1259,11 @@ $first_local_article, @other_local_articles ) = @_; - $local_multi_links_ref->{$first_local_article} //= {}; + $local_multi_links_ref->{$first_local_article} //= {}; # / foreach my $other_local_article (@other_local_articles) { $local_multi_links_ref->{$first_local_article} - ->{$other_local_article} //= []; + ->{$other_local_article} //= []; # / push @{ $local_multi_links_ref->{$first_local_article} ->{$other_local_article} }, $foreign_link; } @@ -1250,7 +1273,7 @@ sub join_links { my ($links_ref, $line_end) = @_; - $line_end //= 1; # / + $line_end //= 1; # / my $link_sep = q{ } . $LINK_SEP . ($line_end ? "\n" : q{ }); return join $link_sep, @{$links_ref}; @@ -1280,10 +1303,11 @@ foreach my $namespace (keys %{ $statistics{'has no interwiki link'} }) { my $iwless_in_namespace = $statistics{'has no interwiki link'}->{$namespace}; - ## no critic ValuesAndExpressions::ProhibitMagicNumbers no integer; + ## no critic (ProhibitMagicNumbers) my $percentage = sprintf '%.2f', 100 * $iwless_in_namespace / $namespace_count{$namespace}; + ## use critic (ValuesAndExpressions::ProhibitMagicNumbers) use integer; INFO("$namespace: $iwless_in_namespace, $percentage%"); } @@ -1344,7 +1368,7 @@ =item * --stop_after=NUMBER Stops processing after page with the given NUMBER. -=item * --stop_after=NUMBER Begins processing after page with the given +=item * --start_from=NUMBER Begins processing after page with the given NUMBER. =item * --page_freq=NUMBER Print the page counter every NUMBER of @@ -1514,7 +1538,7 @@ It checks the syntax, runs perltidy on the code and runs Perl::Critic. -All the places where P::C has been disabled using "# no critic" are explained. +All the places where P::C has been disabled using "no critic" are explained. The time invested in making the code P::C-friendly will be returned as time saved on debugging. Also consider reading the book "Perl Best Practices" by @@ -1527,14 +1551,14 @@ This program works best on GNU/Linux, where Perl and the filesystem are Unicode-friendly. -This program was also tested on Windows XP and Vista with ActivePerl 5.10 -and Cygwin Perl 5.10. In these environments Unicode-related issues caused +This program was also tested on Windows XP and Vista with ActivePerl and +Cygwin. In these environments Unicode-related issues caused filenames and clipboard text to become jumbled. You have been warned. =head1 BUGS AND LIMITATIONS Please report all bugs, features requests and other comments to -Amir E. Aharoni (ami...@gm...). +Amir E. Aharoni (ami...@ma...). =head2 There is no equality between languages @@ -1662,4 +1686,3 @@ I<Visca la llibertat!> =cut - This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |