[PerlWikiBot] SF.net SVN: perlwikibot:[73] trunk/no-interwiki/prepare_noiw_list.pl

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 73
          http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=73&view=rev
Author:   amire80
Date:     2009-09-08 19:20:10 +0000 (Tue, 08 Sep 2009)

Log Message:
-----------
cleanup before upgrading to Parse::MediaWikiDump 0.92

Modified Paths:
--------------
    trunk/no-interwiki/prepare_noiw_list.pl

Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================

--- trunk/no-interwiki/prepare_noiw_list.pl	2008-11-21 23:07:37 UTC (rev 72)
+++ trunk/no-interwiki/prepare_noiw_list.pl	2009-09-08 19:20:10 UTC (rev 73)
@@ -12,7 +12,7 @@
 # This program is Free Software; you can redistribute it and/or modify it
 # under the same terms as Perl itself.
 
-# Upgrade! This script actually uses new Perl 5.10 constructs, so you need it
+# Upgrade! This program actually uses new Perl 5.10 constructs, so you need it
 use 5.010;
 
 # pragmata
@@ -27,7 +27,6 @@
 use English qw(-no_match_vars);
 use Carp qw(croak cluck);
 use Time::Local;
-use List::Util qw(first);
 use Getopt::Long;
 use Data::Dumper;
 use File::Basename;
@@ -36,14 +35,18 @@
 # You must install these modules from CPAN
 
 # Versions before 0.51 have a bug with parsing the namespace of a page
-use Parse::MediaWikiDump 0.51;
+use Readonly;
 use Regexp::Common;
 use Lingua::Translit;
 use Log::Log4perl qw(:easy);
-use Readonly;
+use Parse::MediaWikiDump 0.51;    # Earlier versions have a bug in namespaces
 
+# XXX
+use Devel::Leak;
+use Devel::Size qw(size total_size);
+
 #<<< no perltidy
-my %SVN_PROPS = ( ## no critic ValuesAndExpressions::RequireInterpolationOfMetachars
+my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars)
     Revision => '$Revision$',
     HeadURL  => '$HeadURL$',
     Date     => '$Date$',
@@ -53,7 +56,6 @@
     ? "0.1.9.$+{revision_num}"
     : croak(q(Something is wrong with SVN revision number));
 
-my %PATTERN;
 Readonly my $WIKITEXT_EXT => 'wiki.txt';
 Readonly my $OUT_DIR      => 'out';
 Readonly my $UNSORTED_DIR => "$OUT_DIR/unsorted";
@@ -102,15 +104,15 @@
 }
 
 # XXX Too coupled to Wikipedia, won't work for other projects.
-$PATTERN{dump_fn} = qr{
+Readonly my $DUMP_FN_RE => qr{
     \A                  # Begin string
     (?<wiki_lang>\w+)   # Lang code
     wiki                # Is supposed to be after the lang code
 }xms;
 
-$PATTERN{field_sep} = qr{\t}xms;
+Readonly my $FIELD_SEP_RE => qr{\t}xms;
 
-$PATTERN{string_skip} = qr{
+Readonly my $STRING_SKIP_RE => qr{
     \A          # Begin string
     \s*         # Zero or more spaces
     (?:\#.*)?   # Comment lines
@@ -122,9 +124,9 @@
     or croak('Dump filename must be supplied as an argument.');
 my %STRING;
 my $WIKI_LANG;
-if ((basename $dump_fn) =~ $PATTERN{dump_fn}) {
+if ((basename $dump_fn) =~ $DUMP_FN_RE) {
     $WIKI_LANG = $+{wiki_lang};
-    %STRING    = get_strings($WIKI_LANG);
+    get_strings($WIKI_LANG);
 }
 else {
     croak("$dump_fn is a weird dump file name.");
@@ -135,8 +137,8 @@
 my $TRANSLITERATOR = Lingua::Translit->new('ISO 9');
 
 Readonly my %MW_SYNTAX => (
-    'start_tmpl'      => '{{',
-    'end_tmpl'        => '}}',
+    'start_template'  => '{{',
+    'end_template'    => '}}',
     'start_link'      => '[[',
     'end_link'        => ']]',
     'param_sep'       => q{|},
@@ -146,7 +148,7 @@
 );
 
 # This monstrosity basically says: | and optional spaces
-$PATTERN{param_sep} = qr{\s*\Q$MW_SYNTAX{param_sep}\E\s*}xms;
+Readonly my $PARAM_SEP_RE => qr{\s*\Q$MW_SYNTAX{param_sep}\E\s*}xms;
 
 Readonly my @INCLUDE_NAMESPACES => ('article space', 'Category', 'Portal');
 
@@ -168,7 +170,7 @@
 @REV_MONTH{@MONTHS} = (0 .. $LAST_MONTH);
 
 # XXX Internationalize
-$PATTERN{heb_date} = qr{
+Readonly my $HEB_DATE_RE => qr{
     \A                      # begin
     (?<hour>\d{1,2})        # hour
     :                       # :
@@ -182,7 +184,7 @@
     \z                      # end
 }xms;
 
-$PATTERN{param} = qr{
+Readonly my $PARAM_RE => qr{
     \A                  # Beginning of a string
     \s*                 # Zero or more space
     (?:                 # No capture
@@ -194,22 +196,22 @@
 }xms;
 
 # XXX It should use get_string()
-$PATTERN{simple_no_iw_check} = qr{
+Readonly my $SIMPLE_NO_IW_CHECK_RE => qr{
     \Q$STRING{no_iw}\E # The string may have spaces
 }xmsi;
 
 # A simplistic template just for testing.
 # Quite possibly it is not needed anymore.
 # Until i get a better regex for matching balancing {{}} ...
-$PATTERN{template} = qr{
+Readonly my $TEMPLATE_RE => qr{
     \A                          # beginning of string
-    \Q$MW_SYNTAX{start_tmpl}\E  # {{
+    \Q$MW_SYNTAX{start_template}\E  # {{
     .+                          # some chars
-    \Q$MW_SYNTAX{end_tmpl}\E    # }}
+    \Q$MW_SYNTAX{end_template}\E    # }}
     \z                          # end of string
 }xms;
 
-$PATTERN{wikitable} = qr{
+Readonly my $WIKITABLE_RE => qr{
     \A
     \Q$MW_SYNTAX{start_wikitable}\E
 }xms;
@@ -218,7 +220,7 @@
 # Regular expression mostly copied from
 # Parse::MediaWikiDump::page::redirect
 # TODO: Try to look for the local language redirect keyword in the dump.
-$PATTERN{local_redirect} = qr{
+Readonly my $LOCAL_REDIRECT_RE => qr{
     \A                          # Beginning of string (page)
     \#                          # a # character
     $STRING{REDIRECT}           # Redirect keyword in local language
@@ -226,33 +228,44 @@
     \s*:?\s*\[\[([^\]]*)\]\]    # the link after the redirect
 }xmsi;
 
-$PATTERN{ltr_char}       = qr/\P{IsLeftToRight}/xms;
-$PATTERN{true_template}  = qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms;
-$PATTERN{section_link}   = qr{(?<!&)\#}xms;
-$PATTERN{lowercase_link} = qr{\A[[:lower:]]}xms;
+Readonly my $LTR_CHAR_RE       => qr/\P{IsLeftToRight}/xms;
+Readonly my $SECTION_LINK_RE   => qr{(?<!&)\#}xms;
+Readonly my $LOWERCASE_LINK_RE => qr{\A[[:lower:]]}xms;
 
-# XXX get_string() cannot be used here
-if ($STRING{exclude_lowercase}) {
-    $PATTERN{exclude_lowercase} = qr{\A[$STRING{exclude_lowercase}]}xms;
-}
-$PATTERN{numbered_file} = qr{
+## no critic (ProhibitEscapedMetacharacters)
+Readonly my $TRUE_TEMPLATE_RE => qr/\{ $RE{balanced}{-parens=>'{}'} \}/xms;
+## use critic (ProhibitEscapedMetacharacters)
+
+# get_string() cannot be used in re
+my $string_exclude_lowercase = get_string('exclude_lowercase');
+Readonly my $EXCLUDE_LOWERCASE_RE => qr{
+    \A                          # Beginning of foreign article name
+    [$string_exclude_lowercase] # Character class of possibly lowercase chars
+}xms;
+Readonly my $NUMBERED_FILE_RE => qr{
     (?: _ \d*)?
     \.$WIKITEXT_EXT
 }xms;
-$PATTERN{invalid_filename_char} = qr{[\\\n/:*?"<>|]}xms;    # "
+Readonly my $INVALID_FILENAME_CHAR_RE => qr{[\\\n/:*?"<>|]}xms;    # "
 
-# TODO: Check whether it is Neapolitan with its ''
-$PATTERN{character_code_in_link} = qr{
+my $two_digit_charnumber_re = qr{
     (?:
         [%.]                # There are both %C4%B0 and .AA.E0
         [[:xdigit:]]{2}     # 2 hex digits
     )
-    |
+}xms;
+
+my $html_charnumber_re = qr{
     (?:
         &\#\d+;             # stuff like &#352;
     )
 }xms;
 
+# TODO: Check whether it is Neapolitan with its ''
+Readonly my $CHARACTER_CODE_IN_LINK_RE => qr{
+    $two_digit_charnumber_re | $html_charnumber_re
+}xms;
+
 my %LANG_CODE;
 Readonly my $LANG_CODE_FN => "$WIKI_LANG.language_codes.txt";
 open my $lang_code_file, '<', $LANG_CODE_FN
@@ -260,7 +273,7 @@
 while (my $line = <$lang_code_file>) {
     chomp $line;
     my ($code, $name) = split /\t/xms, $line;
-    $LANG_CODE{$code} = $name // $code;
+    $LANG_CODE{$code} = $name // $code;    # /
 }
 
 close $lang_code_file
@@ -268,7 +281,7 @@
 
 Readonly my $ALT_LANGS => join $ALT_SEP, keys %LANG_CODE;
 
-$PATTERN{interwiki_link} = qr{
+Readonly my $INTERWIKI_LINK_RE => qr{
     \Q$MW_SYNTAX{start_link}\E
     (?<lang_code>
         $ALT_LANGS
@@ -282,6 +295,7 @@
 
 # Lojban allows lowercase articles
 Readonly my @LOWERCASE_LANGS => qw(jbo);
+
 Readonly my $COOLING_DAYS    => 120;
 Readonly my $COOLING_SECONDS => $COOLING_DAYS * 24 * 60 * 60;
 Readonly my $LATEST_COOLING  => time - $COOLING_SECONDS;
@@ -302,11 +316,12 @@
     }
 }
 
-my $dump = Parse::MediaWikiDump::Pages->new($dump_fn);
+my $pmwd = Parse::MediaWikiDump->new();
+my $dump = $pmwd->revisions($dump_fn);
 
 my $namespaces_alt = join $ALT_SEP,
     grep { length > 0 } @{ $dump->namespaces_names() };
-$PATTERN{pure_title} = qr{
+Readonly my $PURE_TITLE_RE => qr{
     \A
     (?:
         (?:$namespaces_alt)
@@ -359,12 +374,27 @@
     return $page->namespace() || get_string('article space');
 }
 
+sub next_page {
+    my $page_ref = eval { $dump->next(); };
+    if ($EVAL_ERROR) {
+        confess("Failed reading a page: $EVAL_ERROR");
+    }
+    return $page_ref;
+}
+
 sub find_iwless {
+    my $leak_handle;
+    my $leak_count;
+
     PAGE:
-    while (my $page = $dump->page()) {
+    while (my $page_ref = next_page()) {
         $page_counter++;
         if ($page_counter % $option{page_freq} == 0) {
             say $page_counter;
+            # my $lead_count = Devel::Leak::NoteSV($leak_handle);
+            # say "leak count: $lead_count";
+            # say 'Devel::Size size of buck dump: ', total_size($dump);
+            # say 'Devel::Size total_size of buck page_ref: ', total_size($page_ref);
         }
 
         last PAGE
@@ -374,45 +404,45 @@
         next PAGE
             if ($page_counter < $option{start_from});
 
-        my $namespace  = namespace($page);
-        my $page_title = $page->title();
+        my $page_namespace = namespace($page_ref);
+        my $page_title     = $page_ref->title();
 
         # Skipping cases:
         next PAGE
             if (
-            not is_in_namespace($page, @INCLUDE_NAMESPACES)
-            or is_redirect($page)
+            not is_in_namespace($page_ref, @INCLUDE_NAMESPACES)
+            or is_redirect($page_ref)
 
             # TODO: Be more precise here.
             # Portal pages which have a '/' in their name are probably
             # internal and do not need interwiki links.
-            or (is_in_namespace($page, 'Portal') and $page_title =~ m{/}xms)
+            or (is_in_namespace($page_ref, 'Portal')
+                and $page_title =~ m{/}xms)
             );
 
-        $namespace_count{$namespace}++;
+        $namespace_count{$page_namespace}++;
         INFO("\n* processing $page_counter - ", $page_title);
 
-        my $page_text_ref = $page->text();
+        my $page_text_ref = $page_ref->text();
 
         # A simple sanity check: is the no_iw template anywhere around here?
-        my $has_tmpl_no_iw =
-            (${$page_text_ref} =~ $PATTERN{simple_no_iw_check});
+        my $has_template_no_iw = ($page_text_ref =~ $SIMPLE_NO_IW_CHECK_RE);
 
         # Does the page have interwiki links?
         # BIG XXX Actually checks only for English
-        my $has_iw = has_interwiki($page);
+        my $has_iw = has_interwiki($page_ref);
 
         if ($has_iw) {
             INFO("has link to $has_iw");
-            if ($has_tmpl_no_iw) {
+            if ($has_template_no_iw) {
                 INFO('has template no_iw. trying to remove ...');
-                remove_tmpl_no_iw($page_text_ref);
+                remove_template_no_iw($page_text_ref);
                 $statistics{'has both valid interwiki and template'}++;
-                special_cases_file('outdated_template', $page);
+                special_cases_file('outdated_template', $page_ref);
             }
         }
         else {    # does not have iw
-            process_iwless_page($page, $has_tmpl_no_iw, $has_iw);
+            process_iwless_page($page_ref, $has_template_no_iw, $has_iw);
         }
     }
 
@@ -421,25 +451,24 @@
 
 sub process_iwless_page {
     my (
-        $page,              # object ref
-        $has_tmpl_no_iw,    # scalar bool
-        $has_iw             # scalar bool
+        $page_ref,              # object ref
+        $has_template_no_iw,    # scalar bool
+        $has_iw                 # scalar bool
     ) = @_;
 
     INFO(q(does not have iw link.));
-    $statistics{'has no interwiki link'}->{ namespace($page) }++;
+    $statistics{'has no interwiki link'}->{ namespace($page_ref) }++;
 
     # Now we need to search for no_iw templates
     # and parse their parameters - date and type
 
     my @found_templates = ();
 
-    my $page_text_ref = $page->text();
-    my $page_title    = $page->title();
+    my $page_text_ref = $page_ref->text();
 
     # Optimized - does not start searching,
     # if we already know that it is not there
-    if ($has_tmpl_no_iw) {
+    if ($has_template_no_iw) {
         find_templates($page_text_ref, \@found_templates,
             [ get_string('no_iw') ]);
     }
@@ -455,7 +484,7 @@
         if ($found_templates_count > 1) {
             WARN('many templates were found');
             $statistics{'many templates'}++;
-            special_cases_file('many_templates', $page);
+            special_cases_file('many_templates', $page_ref);
         }
         else {
             INFO('good, found one template');
@@ -476,7 +505,7 @@
             if (not defined $date_ref) {
                 INFO("invalid date: '$date_str'");
                 $statistics{'invalid date'}++;
-                special_cases_file('invalid_date', $page);
+                special_cases_file('invalid_date', $page_ref);
             }
             elsif (cooling_date_passed($date_ref)) {
                 INFO('cooling date passed, updating to today ...');
@@ -494,11 +523,11 @@
 
     }
 
-    my @all_types = get_all_types($template->{params}->{type}, $page);
+    my @all_types = get_all_types($template->{params}->{type}, $page_ref);
 
     foreach my $type (@all_types) {
-        INFO("adding $page_title to the list as type $type");
-        add_to_no_iw_list($page, $type);
+        INFO('adding ' . $page_ref->title() . " to the list as type $type");
+        add_to_no_iw_list($page_ref, $type);
         $type_count{$type}++;
     }
 
@@ -508,7 +537,7 @@
 sub get_all_types {
     my ($type_param, $page) = @_;
 
-    $type_param //= q{};
+    $type_param //= q{};    # /
     strip_whitespace($type_param);
 
     my @all_types = split $TYPE_SEP, $type_param;
@@ -545,23 +574,23 @@
 
 sub find_templates {
     my (
-        $page_text_ref,          # string ref
+        $text_ref,               # string ref
         $found_templates_ref,    # array ref
         $filter                  # string array ref
     ) = @_;
 
     # A reference to an array with one empty string.
     # Matching against an empty string will always succeed.
-    $filter //= [q{}];
+    $filter //= [q{}];           # /
 
     # Get all highest-level matches
-    my @matches = (${$page_text_ref} =~ /$PATTERN{true_template}/xmsgo);
+    my @matches = (${$text_ref} =~ /$TRUE_TEMPLATE_RE/xmsgo);
 
     MATCH:
     foreach my $next_match (@matches) {
-        if ($next_match !~ $PATTERN{template}) {
+        if ($next_match !~ $TEMPLATE_RE) {
             INFO(q(i thought that it is a template, but it was:));
-            if ($next_match =~ $PATTERN{wikitable}) {
+            if ($next_match =~ $WIKITABLE_RE) {
                 INFO('a wikitable');
             }
             else {
@@ -574,7 +603,8 @@
         foreach my $next_filter (@{$filter}) {
 
             # N.B. - case-insensitive. Wrong, but kinda useful.
-            if ($next_match =~ /\A\Q$MW_SYNTAX{'start_tmpl'}$next_filter/xmsi)
+            if ($next_match =~
+                /\A\Q$MW_SYNTAX{'start_template'}$next_filter/xmsi)
             {
 
                 # N.B.: parse_template calls find_templates() recursively
@@ -592,7 +622,7 @@
     my (
         $template,               # string ref
         $default_param_names,    # string array ref
-        $subtmpl_filter,         # string array ref
+        $subtemplate_filter,     # string array ref
     ) = @_;
 
     # %parsed_template:
@@ -602,16 +632,16 @@
     # {subtemplates}    - array ref
     my (%parsed_template, %parsed_params, @clauses);
 
-    $parsed_template{text} = strip_tmpl_curlies(${$template});
+    $parsed_template{text} = strip_template_curlies(${$template});
 
     # First string of the split is the template name,
     # the rest is the params
     ($parsed_template{name}, @clauses) =
-        (split $PATTERN{param_sep}, ${ $parsed_template{text} });
+        (split $PARAM_SEP_RE, ${ $parsed_template{text} });
 
     my $param_counter = 0;
     foreach my $clause (@clauses) {
-        if ($clause =~ $PATTERN{param}) {
+        if ($clause =~ $PARAM_RE) {
             #<<< no perltidy
             my ($name, $value) = @+{ qw(param_name value) };
             #>>>
@@ -630,7 +660,7 @@
             $parsed_params{$name} = $value;
         }
         else {
-            my $error_msg = "Weird - $clause does not look a param";
+            my $error_msg = "Weird - $clause does not look like a param";
             INFO($error_msg);
             cluck($error_msg);
             $statistics{'weird param'}++;
@@ -641,7 +671,7 @@
 
     # Possible recursion
     find_templates($parsed_template{text}, $parsed_template{subtemplates},
-        $subtmpl_filter);
+        $subtemplate_filter);
 
     return \%parsed_template;
 }
@@ -651,7 +681,7 @@
 
     return if (not defined $date_str);
 
-    if ($date_str =~ $PATTERN{heb_date}) {
+    if ($date_str =~ $HEB_DATE_RE) {
         INFO("found a valid date: $date_str");
         my %parsed_date = (
             'sec' => 0,    # useful for timelocal
@@ -677,15 +707,15 @@
     return;
 }
 
-sub strip_tmpl_curlies {
+sub strip_template_curlies {
     my ($template) = @_;
     for ($template) {
         s{
             \A
-            \Q$MW_SYNTAX{start_tmpl}\E
+            \Q$MW_SYNTAX{start_template}\E
         }{}xms;
         s{
-            \Q$MW_SYNTAX{end_tmpl}\E
+            \Q$MW_SYNTAX{end_template}\E
             \z
         }{}xms;
     }
@@ -693,7 +723,7 @@
 }
 
 # no arg unpacking for simplicity and performance
-sub strip_whitespace {    ## no critic Subroutines::RequireArgUnpacking
+sub strip_whitespace {    ## no critic (RequireArgUnpacking)
     for (@_) {
         s/\A\s*//xms;
         s/\s*\z//xms;
@@ -705,13 +735,14 @@
     my ($page) = @_;
 
     my $page_title = $page->title();
-    my $page_text  = ${ $page->text() };
-    study $page_text;
+    my $page_text  = ${ $page->text() };    # XXX
 
+    study $page_text;                       # XXX
+
     my %iw_links;
     my %special_cases;
 
-    while ($page_text =~ /$PATTERN{interwiki_link}/xmsgo) {
+    while ($page_text =~ /$INTERWIKI_LINK_RE/xmsgo) {    # XXX
         my ($lang_code, $foreign_article) = @+{qw(lang_code foreign_article)};
         if (defined $iw_links{$lang_code}) {
             $special_cases{double_links}->{$lang_code} = q{};
@@ -722,31 +753,31 @@
 
         # A # sign not after an &.
         # After an & it is probably a character number.
-        if ($foreign_article =~ $PATTERN{section_link}) {
+        if ($foreign_article =~ $SECTION_LINK_RE) {
             $special_cases{section_links}->{$lang_code} = q{};
         }
 
         # Char codes are common in section links, so there is no
         # need to show them again
-        elsif ($foreign_article =~ $PATTERN{character_code_in_link}) {
-            $special_cases{charnumber_links}{$lang_code} = q{};
+        elsif ($foreign_article =~ $CHARACTER_CODE_IN_LINK_RE) {
+            $special_cases{charnumber_links}->{$lang_code} = q{};
         }
 
         # Lowercase links
         if (    (not $lang_code ~~ @LOWERCASE_LANGS)
-            and ($foreign_article =~ $PATTERN{lowercase_link}))
+            and ($foreign_article =~ $LOWERCASE_LINK_RE))
         {
             my $include_lowercase_link = 1;
 
             # XXX get_string() cannot be used here
             if (defined $STRING{exclude_lowercase}
-                and $foreign_article =~ $PATTERN{exclude_lowercase})
+                and $foreign_article =~ $EXCLUDE_LOWERCASE_RE)
             {
                 $include_lowercase_link = 0;
             }
 
             if ($include_lowercase_link) {
-                $special_cases{lowercase_links}{$lang_code} = q{};
+                $special_cases{lowercase_links}->{$lang_code} = q{};
             }
         }
 
@@ -801,7 +832,7 @@
     return;
 }
 
-sub remove_tmpl_no_iw {
+sub remove_template_no_iw {
     my ($params) = @_;
     INFO(     "Supposed to remove the no_iw template now, but ...\n"
             . 'This sub is a stub. You can help Wikipedia by expanding it!');
@@ -853,7 +884,7 @@
     my ($page_title) = @_;
 
     my $sort_title;
-    if ($page_title =~ $PATTERN{pure_title}) {
+    if ($page_title =~ $PURE_TITLE_RE) {
         $sort_title = $+{pure_title};
     }
     else {
@@ -901,7 +932,7 @@
         . $MW_SYNTAX{end_link};
 
     if ($option{rtl}) {
-        if ($page_title =~ $PATTERN{ltr_char}) {
+        if ($page_title =~ $LTR_CHAR_RE) {
 
             # XXX get_string() cannot be used here
             $link_to_page = $STRING{rlm} . $link_to_page . $STRING{rlm};
@@ -930,9 +961,9 @@
         chomp $type_name;
         foreach my $line (@lines) {
             chomp $line;
-            my ($page_title, $sort_title) = split $PATTERN{field_sep}, $line;
+            my ($page_title, $sort_title) = split $FIELD_SEP_RE, $line;
             my $sort_letter = get_sort_letter($sort_title);
-            $all_pages_in_type{$sort_letter} //= [];
+            $all_pages_in_type{$sort_letter} //= [];    # /
             push @{ $all_pages_in_type{$sort_letter} }, $page_title;
         }
         write_sorted_pages($type_name, \%all_pages_in_type);
@@ -965,8 +996,6 @@
                 write_page(\$page, \$type_fn, $file_number++);
                 $section_counter = 0;
 
-                # N.B. Trying to free memory, not guaranteed
-                undef $page;
                 $page = q{};
             }
             elsif ($section_counter) {
@@ -988,16 +1017,12 @@
                 [ @all_links_in_letter[ $first_link .. $last_link ] ]);
             $page .= $links;
         }
-
-        # N.B. Trying to free memory, not guaranteed
-        undef @all_links_in_letter;
     }
 
     # The page may be empty at this point
     if ($page) {
         write_page(\$page, \$type_fn, $file_number++);
     }
-    undef $page;
 
     return;
 }
@@ -1007,7 +1032,7 @@
 
     my $pretty_file_number = sprintf '%03d', $file_number;
     ${$type_fn_ref} =~ s{
-        $PATTERN{numbered_file}
+        $NUMBERED_FILE_RE
     }
     {_$pretty_file_number.$WIKITEXT_EXT}xmso;
     INFO("creating file ${$type_fn_ref}");
@@ -1036,7 +1061,7 @@
 
 # Custom Unicode character property for finding characters.
 # The custom is to give those subroutines CamelCase names.
-sub IsLeftToRight {    ## no critic NamingConventions::ProhibitMixedCaseSubs
+sub IsLeftToRight {    ## no critic (Capitalization)
     return <<'END';
 +utf8::InHebrew
 +utf8::IsSpace
@@ -1053,7 +1078,7 @@
         INFO("\nEnglish redirect: $page_title");
         return 1;
     }
-    if (${$page_text_ref} =~ $PATTERN{local_redirect}) {
+    if (${$page_text_ref} =~ $LOCAL_REDIRECT_RE) {
         INFO("\nLocal redirect: $page_title");
         return 1;
     }
@@ -1093,22 +1118,20 @@
     close $STRINGS_FILE
         or croak(file_error('closing', $STRINGS_FN, 'reading'));
 
-    my %STRING;
-
     STRING_LINE:
     foreach my $next_string_line (@strings_file_lines) {
 
         # Skip blanks and comments
-        next STRING_LINE if ($next_string_line =~ $PATTERN{string_skip});
+        next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE);
 
         chomp $next_string_line;
-        my ($english, $target) = split $PATTERN{field_sep}, $next_string_line;
+        my ($english, $target) = split $FIELD_SEP_RE, $next_string_line;
 
         # Fallback to English if no target language string was supplied
         $STRING{$english} = $target // $english;    # /
     }
 
-    return %STRING;
+    return;
 }
 
 sub get_string {
@@ -1125,7 +1148,7 @@
 
     my $type_fn = "$transliterated_type.$WIKITEXT_EXT";
 
-    $type_fn =~ s{$PATTERN{invalid_filename_char}}{-}xmsgo;
+    $type_fn =~ s{$INVALID_FILENAME_CHAR_RE}{-}xmsgo;
     my $dir = $unsorted ? $UNSORTED_DIR : $OUT_DIR;
     $type_fn = "$dir/$type_fn";
 
@@ -1138,7 +1161,7 @@
     open my $file, '>>:utf8', $fn
         or croak(file_error('opening', $fn, 'appending'));
 
-    say {$file} ($line // q{});
+    say {$file} ($line // q{});    # /
 
     close $file
         or croak(file_error('closing', $fn, 'appeding'));
@@ -1236,11 +1259,11 @@
         $first_local_article,   @other_local_articles
     ) = @_;
 
-    $local_multi_links_ref->{$first_local_article} //= {};
+    $local_multi_links_ref->{$first_local_article} //= {};    # /
 
     foreach my $other_local_article (@other_local_articles) {
         $local_multi_links_ref->{$first_local_article}
-            ->{$other_local_article} //= [];
+            ->{$other_local_article} //= [];                  # /
         push @{ $local_multi_links_ref->{$first_local_article}
                 ->{$other_local_article} }, $foreign_link;
     }
@@ -1250,7 +1273,7 @@
 
 sub join_links {
     my ($links_ref, $line_end) = @_;
-    $line_end //= 1;    # /
+    $line_end //= 1;                                          # /
 
     my $link_sep = q{ } . $LINK_SEP . ($line_end ? "\n" : q{ });
     return join $link_sep, @{$links_ref};
@@ -1280,10 +1303,11 @@
     foreach my $namespace (keys %{ $statistics{'has no interwiki link'} }) {
         my $iwless_in_namespace =
             $statistics{'has no interwiki link'}->{$namespace};
-        ## no critic ValuesAndExpressions::ProhibitMagicNumbers
         no integer;
+        ## no critic (ProhibitMagicNumbers)
         my $percentage = sprintf '%.2f',
             100 * $iwless_in_namespace / $namespace_count{$namespace};
+        ## use critic (ValuesAndExpressions::ProhibitMagicNumbers)
         use integer;
         INFO("$namespace: $iwless_in_namespace, $percentage%");
     }
@@ -1344,7 +1368,7 @@
 =item * --stop_after=NUMBER Stops processing after page with the given
 NUMBER.
 
-=item * --stop_after=NUMBER Begins processing after page with the given
+=item * --start_from=NUMBER Begins processing after page with the given
 NUMBER.
 
 =item * --page_freq=NUMBER Print the page counter every NUMBER of
@@ -1514,7 +1538,7 @@
 
 It checks the syntax, runs perltidy on the code and runs Perl::Critic.
 
-All the places where P::C has been disabled using "# no critic" are explained.
+All the places where P::C has been disabled using "no critic" are explained.
 
 The time invested in making the code P::C-friendly will be returned as time
 saved on debugging. Also consider reading the book "Perl Best Practices" by
@@ -1527,14 +1551,14 @@
 This program works best on GNU/Linux, where Perl and the filesystem are
 Unicode-friendly.
 
-This program was also tested on Windows XP and Vista with ActivePerl 5.10
-and Cygwin Perl 5.10. In these environments Unicode-related issues caused
+This program was also tested on Windows XP and Vista with ActivePerl and
+Cygwin. In these environments Unicode-related issues caused
 filenames and clipboard text to become jumbled. You have been warned.
 
 =head1 BUGS AND LIMITATIONS
 
 Please report all bugs, features requests and other comments to
-Amir E. Aharoni (ami...@gm...).
+Amir E. Aharoni (ami...@ma...).
 
 =head2 There is no equality between languages
 
@@ -1662,4 +1686,3 @@
 I<Visca la llibertat!>
 
 =cut
-


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.