[PerlWikiBot] SF.net SVN: perlwikibot:[87] trunk/no-interwiki
Status: Pre-Alpha
Brought to you by:
rotemliss
|
From: <am...@us...> - 2010-03-26 14:03:33
|
Revision: 87
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=87&view=rev
Author: amire80
Date: 2010-03-26 14:03:26 +0000 (Fri, 26 Mar 2010)
Log Message:
-----------
Starting very naive refactoring.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Added Paths:
-----------
trunk/no-interwiki/MediaWiki/
trunk/no-interwiki/MediaWiki/Toolkit.pm
Added: trunk/no-interwiki/MediaWiki/Toolkit.pm
===================================================================
--- trunk/no-interwiki/MediaWiki/Toolkit.pm (rev 0)
+++ trunk/no-interwiki/MediaWiki/Toolkit.pm 2010-03-26 14:03:26 UTC (rev 87)
@@ -0,0 +1,104 @@
+package MediaWiki::Toolkit;
+
+use 5.010;
+
+use strict;
+use warnings;
+use integer;
+use utf8;
+use open ':encoding(utf8)';
+
+use English qw(-no_match_vars);
+
+use base 'Exporter';
+our %EXPORT_TAGS = (
+ FIELD_SEP => [qw( $FIELD_SEP $FIELD_SEP_RE )],
+ GET_STRING => [qw( get_strings get_string )],
+ FILE_UTIL => [qw( file_error append_to_file )],
+);
+our @EXPORT_OK = map { @{$_} } values %EXPORT_TAGS;
+
+our $VERSION = '0.01';
+#<<< no perltidy
+my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars)
+ Revision => '$Revision: 0 $',
+ HeadURL => '$HeadURL: https://perlwikibot.svn.sourceforge.net/svnroot/perlwikibot/trunk/no-interwiki/MediaWiki/Toolkit.pm $',
+ Date => '$Date: 2010-03-25 17:02:28 +0200 (Thu, 25 Mar 2010) $',
+);
+#>>>
+
+our $FIELD_SEP = qq{\t};
+our $FIELD_SEP_RE = qr{\Q$FIELD_SEP\E}xms;
+
+#
+# This is poor man's gettext.
+# TODO: Replace it with Locale::Maketext
+#
+{
+ my $STRING_SKIP_RE = qr{
+ \A # Begin string
+ \s* # Zero or more spaces
+ (?:\#.*)? # Comment lines
+ \z # End string
+}xms;
+
+ my %string;
+
+ sub get_strings {
+ my ($lang) = @_;
+
+ my $strings_fn = "$lang.strings.txt";
+
+ open my $strings_file, '<', $strings_fn
+ or croak(file_error('opening', $strings_fn, 'reading'));
+ my @strings_file_lines = <$strings_file>;
+ close $strings_file
+ or croak(file_error('closing', $strings_fn, 'reading'));
+
+ STRING_LINE:
+ foreach my $next_string_line (@strings_file_lines) {
+
+ # Skip blanks and comments
+ next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE);
+
+ chomp $next_string_line;
+ my ($english, $target) = split $FIELD_SEP_RE, $next_string_line;
+
+ # Fallback to English if no target language string was supplied
+ $string{$english} = $target // $english; # /
+ }
+
+ return;
+ }
+
+ sub get_string {
+ my ($english, $if_defined) = @_;
+
+ return $if_defined
+ ? ($string{$english})
+ : ($string{$english} //= $english); # /
+ }
+}
+
+sub file_error {
+ my ($operation, $fn, $access_type) = @_;
+ my $string = "error $operation $fn, $access_type: $OS_ERROR";
+ return $string;
+}
+
+sub append_to_file {
+ my ($fn, $line) = @_;
+
+ open my $file, '>>', $fn
+ or croak(file_error('opening', $fn, 'appending'));
+
+ say {$file} ($line // q{}); # /
+
+ close $file
+ or croak(file_error('closing', $fn, 'appeding'));
+
+ return;
+}
+
+1;
+
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2010-03-25 15:02:28 UTC (rev 86)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2010-03-26 14:03:26 UTC (rev 87)
@@ -31,7 +31,7 @@
use strict;
use warnings;
use integer; # Improves performance
-use open ':utf8';
+use open ':encoding(utf8)';
use utf8;
use charnames ':full';
@@ -56,6 +56,12 @@
use Log::Log4perl qw(:easy);
use Parse::MediaWikiDump 0.91; # Earlier versions have a different API
+use MediaWiki::Toolkit (
+ qw(
+ :FIELD_SEP
+ :GET_STRING :FILE_UTIL)
+);
+
our $VERSION = '0.2.1';
#<<< no perltidy
my %SVN_PROPS = ( ## no critic (RequireInterpolationOfMetachars)
@@ -74,7 +80,6 @@
my $UNSORTED_DIR = "$OUT_DIR/unsorted";
my $MULTI_DIR = "$OUT_DIR/multilinks";
my $ALT_SEP = q{|};
-my $FIELD_SEP = qq{\t};
my $LINK_SEP = q{|};
my $TYPE_SEP = qr/\s*;\s*/xms;
my $HTML_RLM = '‏';
@@ -158,28 +163,19 @@
wiki # Is supposed to be after the lang code
}xms;
-my $FIELD_SEP_RE = qr{\t}xms;
-
-my $STRING_SKIP_RE = qr{
- \A # Begin string
- \s* # Zero or more spaces
- (?:\#.*)? # Comment lines
- \z # End string
-}xms;
-
# pages-meta-current
my $dump_fn = $ARGV[0]
or croak('Dump filename must be supplied as an argument.');
-my %STRING;
my $WIKI_LANG;
if ((basename $dump_fn) =~ $DUMP_FN_RE) {
$WIKI_LANG = $+{wiki_lang};
- get_strings($WIKI_LANG);
}
else {
croak("$dump_fn is a weird dump file name.");
}
+get_strings($WIKI_LANG);
+
# XXX - bad i18n
# ISO 9 is mostly good for Russian and it is still not perfect ASCII
my $TRANSLITERATOR = Lingua::Translit->new('ISO 9');
@@ -204,14 +200,12 @@
# Constants for date processing
#
-my @MONTHS = @STRING{
- qw(
- January February March
- April May June
- July August September
- October November December
- )
- };
+my @MONTHS = map { get_string($_) } qw(
+ January February March
+ April May June
+ July August September
+ October November December
+);
my $ALT_MONTHS = join $ALT_SEP, @MONTHS;
my %REV_MONTH;
Readonly my $LAST_MONTH => 11;
@@ -1163,52 +1157,16 @@
sub is_disambig {
my ($page) = @_;
+
my $found_templates =
find_templates($page->text(), [], [ get_string('disambig') ]);
+
return scalar @{$found_templates};
}
-#
-# This is poor man's gettext.
-# TODO: Replace it with Locale::Maketext
-#
-sub get_strings {
- my ($lang) = @_;
-
- my $STRINGS_FN = "$lang.strings.txt";
-
- open my $STRINGS_FILE, '<:utf8', $STRINGS_FN
- or croak(file_error('opening', $STRINGS_FN, 'reading'));
- my @strings_file_lines = <$STRINGS_FILE>;
- close $STRINGS_FILE
- or croak(file_error('closing', $STRINGS_FN, 'reading'));
-
- STRING_LINE:
- foreach my $next_string_line (@strings_file_lines) {
-
- # Skip blanks and comments
- next STRING_LINE if ($next_string_line =~ $STRING_SKIP_RE);
-
- chomp $next_string_line;
- my ($english, $target) = split $FIELD_SEP_RE, $next_string_line;
-
- # Fallback to English if no target language string was supplied
- $STRING{$english} = $target // $english; # /
- }
-
- return;
-}
-
-sub get_string {
- my ($english, $if_defined) = @_;
- return $if_defined
- ? ($STRING{$english})
- : ($STRING{$english} //= $english); # /
-}
-
sub make_type_fn {
my ($type, $unsorted) = @_;
- $unsorted //= 0; # /
+ $unsorted //= 0; # /
#my $transliterated_type = $TRANSLITERATOR->translit($type);
my $transliterated_type = $type;
@@ -1222,20 +1180,6 @@
return $type_fn;
}
-sub append_to_file {
- my ($fn, $line) = @_;
-
- open my $file, '>>:utf8', $fn
- or croak(file_error('opening', $fn, 'appending'));
-
- say {$file} ($line // q{}); # /
-
- close $file
- or croak(file_error('closing', $fn, 'appeding'));
-
- return;
-}
-
# It appears simple, but in non-alphabetic languages such as Chinese
# it may be different, so it will sit here ready for better i18n.
sub get_sort_letter {
@@ -1243,12 +1187,6 @@
return substr $string, 0, 1;
}
-sub file_error {
- my ($operation, $fn, $access_type) = @_;
- my $string = "error $operation $fn for $access_type: $OS_ERROR";
- return $string;
-}
-
sub format_link_table { ## no critic (RequireArgUnpacking)
return sprintf '%-15s %8d', @_;
}
@@ -1259,7 +1197,7 @@
say "processing $lang_code";
my $lang_reftype = ref $found_links{$lang_code};
if ($lang_reftype ne 'HASH') {
- carp('$lang_code is $lang_reftype, not hashref!');
+ carp("$lang_code is $lang_reftype, not hashref!");
next LANG_CODE;
}
my $filename = "$MULTI_DIR/$lang_code.$WIKITEXT_EXT";
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|