[PerlWikiBot] SF.net SVN: perlwikibot:[87] trunk/no-interwiki
Status: Pre-Alpha
Brought to you by:
rotemliss
From: <am...@us...> - 2010-03-26 14:03:33
|
Revision: 87 http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=87&view=rev Author: amire80 Date: 2010-03-26 14:03:26 +0000 (Fri, 26 Mar 2010) Log Message: ----------- Starting very naive refactoring. Modified Paths: -------------- trunk/no-interwiki/prepare_noiw_list.pl Added Paths: ----------- trunk/no-interwiki/MediaWiki/ trunk/no-interwiki/MediaWiki/Toolkit.pm Added: trunk/no-interwiki/MediaWiki/Toolkit.pm =================================================================== --- trunk/no-interwiki/MediaWiki/Toolkit.pm (rev 0) +++ trunk/no-interwiki/MediaWiki/Toolkit.pm 2010-03-26 14:03:26 UTC (rev 87) @@ -0,0 +1,104 @@ +package MediaWiki::Toolkit; + +use 5.010; + +use strict; +use warnings; +use integer; +use utf8; +use open ':encoding(utf8)'; + +use English qw(-no_match_vars); + +use base 'Exporter'; +our %EXPORT_TAGS = ( + FIELD_SEP => [qw( $FIELD_SEP $FIELD_SEP_RE )], + GET_STRING => [qw( get_strings get_string )], + FILE_UTIL => [qw( file_error append_to_file )], +); +our @EXPORT_OK = map { @{$_} } values %EXPORT_TAGS; + +our $VERSION = '0.01'; +#<<< no perltidy +my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars) + Revision => '$Revision: 0 $', + HeadURL => '$HeadURL: https://perlwikibot.svn.sourceforge.net/svnroot/perlwikibot/trunk/no-interwiki/MediaWiki/Toolkit.pm $', + Date => '$Date: 2010-03-25 17:02:28 +0200 (Thu, 25 Mar 2010) $', +); +#>>> + +our $FIELD_SEP = qq{\t}; +our $FIELD_SEP_RE = qr{\Q$FIELD_SEP\E}xms; + +# +# This is poor man's gettext. +# TODO: Replace it with Locale::Maketext +# +{ + my $STRING_SKIP_RE = qr{ + \A # Begin string + \s* # Zero or more spaces + (?:\#.*)? # Comment lines + \z # End string +}xms; + + my %string; + + sub get_strings { + my ($lang) = @_; + + my $strings_fn = "$lang.strings.txt"; + + open my $strings_file, '<', $strings_fn + or croak(file_error('opening', $strings_fn, 'reading')); + my @strings_file_lines = <$strings_file>; + close $strings_file + or croak(file_error('closing', $strings_fn, 'reading')); + + STRING_LINE: + foreach my $next_string_line (@strings_file_lines) { + + # Skip blanks and comments + next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE); + + chomp $next_string_line; + my ($english, $target) = split $FIELD_SEP_RE, $next_string_line; + + # Fallback to English if no target language string was supplied + $string{$english} = $target // $english; # / + } + + return; + } + + sub get_string { + my ($english, $if_defined) = @_; + + return $if_defined + ? ($string{$english}) + : ($string{$english} //= $english); # / + } +} + +sub file_error { + my ($operation, $fn, $access_type) = @_; + my $string = "error $operation $fn, $access_type: $OS_ERROR"; + return $string; +} + +sub append_to_file { + my ($fn, $line) = @_; + + open my $file, '>>', $fn + or croak(file_error('opening', $fn, 'appending')); + + say {$file} ($line // q{}); # / + + close $file + or croak(file_error('closing', $fn, 'appeding')); + + return; +} + +1; + Modified: trunk/no-interwiki/prepare_noiw_list.pl =================================================================== --- trunk/no-interwiki/prepare_noiw_list.pl 2010-03-25 15:02:28 UTC (rev 86) +++ trunk/no-interwiki/prepare_noiw_list.pl 2010-03-26 14:03:26 UTC (rev 87) @@ -31,7 +31,7 @@ use strict; use warnings; use integer; # Improves performance -use open ':utf8'; +use open ':encoding(utf8)'; use utf8; use charnames ':full'; @@ -56,6 +56,12 @@ use Log::Log4perl qw(:easy); use Parse::MediaWikiDump 0.91; # Earlier versions have a different API +use MediaWiki::Toolkit ( + qw( + :FIELD_SEP + :GET_STRING :FILE_UTIL) +); + our $VERSION = '0.2.1'; #<<< no perltidy my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars) @@ -74,7 +80,6 @@ my $UNSORTED_DIR = "$OUT_DIR/unsorted"; my $MULTI_DIR = "$OUT_DIR/multilinks"; my $ALT_SEP = q{|}; -my $FIELD_SEP = qq{\t}; my $LINK_SEP = q{|}; my $TYPE_SEP = qr/\s*;\s*/xms; my $HTML_RLM = '‏'; @@ -158,28 +163,19 @@ wiki # Is supposed to be after the lang code }xms; -my $FIELD_SEP_RE = qr{\t}xms; - -my $STRING_SKIP_RE = qr{ - \A # Begin string - \s* # Zero or more spaces - (?:\#.*)? # Comment lines - \z # End string -}xms; - # pages-meta-current my $dump_fn = $ARGV[0] or croak('Dump filename must be supplied as an argument.'); -my %STRING; my $WIKI_LANG; if ((basename $dump_fn) =~ $DUMP_FN_RE) { $WIKI_LANG = $+{wiki_lang}; - get_strings($WIKI_LANG); } else { croak("$dump_fn is a weird dump file name."); } +get_strings($WIKI_LANG); + # XXX - bad i18n # ISO 9 is mostly good for Russian and it is still not perfect ASCII my $TRANSLITERATOR = Lingua::Translit->new('ISO 9'); @@ -204,14 +200,12 @@ # Constants for date processing # -my @MONTHS = @STRING{ - qw( - January February March - April May June - July August September - October November December - ) - }; +my @MONTHS = map { get_string($_) } qw( + January February March + April May June + July August September + October November December +); my $ALT_MONTHS = join $ALT_SEP, @MONTHS; my %REV_MONTH; Readonly my $LAST_MONTH => 11; @@ -1163,52 +1157,16 @@ sub is_disambig { my ($page) = @_; + my $found_templates = find_templates($page->text(), [], [ get_string('disambig') ]); + return scalar @{$found_templates}; } -# -# This is poor man's gettext. -# TODO: Replace it with Locale::Maketext -# -sub get_strings { - my ($lang) = @_; - - my $STRINGS_FN = "$lang.strings.txt"; - - open my $STRINGS_FILE, '<:utf8', $STRINGS_FN - or croak(file_error('opening', $STRINGS_FN, 'reading')); - my @strings_file_lines = <$STRINGS_FILE>; - close $STRINGS_FILE - or croak(file_error('closing', $STRINGS_FN, 'reading')); - - STRING_LINE: - foreach my $next_string_line (@strings_file_lines) { - - # Skip blanks and comments - next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE); - - chomp $next_string_line; - my ($english, $target) = split $FIELD_SEP_RE, $next_string_line; - - # Fallback to English if no target language string was supplied - $STRING{$english} = $target // $english; # / - } - - return; -} - -sub get_string { - my ($english, $if_defined) = @_; - return $if_defined - ? ($STRING{$english}) - : ($STRING{$english} //= $english); # / -} - sub make_type_fn { my ($type, $unsorted) = @_; - $unsorted //= 0; # / + $unsorted //= 0; # / #my $transliterated_type = $TRANSLITERATOR->translit($type); my $transliterated_type = $type; @@ -1222,20 +1180,6 @@ return $type_fn; } -sub append_to_file { - my ($fn, $line) = @_; - - open my $file, '>>:utf8', $fn - or croak(file_error('opening', $fn, 'appending')); - - say {$file} ($line // q{}); # / - - close $file - or croak(file_error('closing', $fn, 'appeding')); - - return; -} - # It appears simple, but in non-alphabetic languages such as Chinese # it may be different, so it will sit here ready for better i18n. sub get_sort_letter { @@ -1243,12 +1187,6 @@ return substr $string, 0, 1; } -sub file_error { - my ($operation, $fn, $access_type) = @_; - my $string = "error $operation $fn for $access_type: $OS_ERROR"; - return $string; -} - sub format_link_table { ## no critic (RequireArgUnpacking) return sprintf '%-15s %8d', @_; } @@ -1259,7 +1197,7 @@ say "processing $lang_code"; my $lang_reftype = ref $found_links{$lang_code}; if ($lang_reftype ne 'HASH') { - carp('$lang_code is $lang_reftype, not hashref!'); + carp("$lang_code is $lang_reftype, not hashref!"); next LANG_CODE; } my $filename = "$MULTI_DIR/$lang_code.$WIKITEXT_EXT"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |