perlwikibot-svn Mailing List for Perl MediaWiki Robot (Page 3)
Status: Pre-Alpha
Brought to you by:
rotemliss
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(19) |
Oct
|
Nov
|
Dec
|
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
|
Feb
|
Mar
(4) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2008 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(20) |
Aug
(12) |
Sep
|
Oct
|
Nov
(3) |
Dec
|
| 2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(11) |
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
|
Feb
|
Mar
(6) |
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(2) |
Dec
|
|
From: <am...@us...> - 2008-07-28 16:26:10
|
Revision: 43
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=43&view=rev
Author: amire80
Date: 2008-07-28 16:26:20 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Adding some strings and language codes files.
Added Paths:
-----------
trunk/no-interwiki/eo.language_codes.txt
trunk/no-interwiki/eo.strings.txt
trunk/no-interwiki/he.language_codes.txt
trunk/no-interwiki/he.strings.txt
trunk/no-interwiki/oc.language_codes.txt
Added: trunk/no-interwiki/eo.language_codes.txt
===================================================================
--- trunk/no-interwiki/eo.language_codes.txt (rev 0)
+++ trunk/no-interwiki/eo.language_codes.txt 2008-07-28 16:26:20 UTC (rev 43)
@@ -0,0 +1 @@
+link he.language_codes.txt
\ No newline at end of file
Property changes on: trunk/no-interwiki/eo.language_codes.txt
___________________________________________________________________
Added: svn:special
+ *
Added: trunk/no-interwiki/eo.strings.txt
===================================================================
--- trunk/no-interwiki/eo.strings.txt (rev 0)
+++ trunk/no-interwiki/eo.strings.txt 2008-07-28 16:26:20 UTC (rev 43)
@@ -0,0 +1,34 @@
+# months
+January Januaro
+February Februaro
+March Marto
+April Aprilo
+May Majo
+June Junio
+July Julio
+August Auxgusto
+September Septembro
+October Oktobro
+November Novembro
+December Decembro
+
+in
+
+no_iw no estas interviki
+category kategorio
+disambig apartigilo
+template Sxablono
+
+date dato
+type tipo
+
+# MW specials
+REDIRECT REDIRECT
+
+# Namespaces
+User Vikipediisto
+User talk Vikipediista diskuto
+Image Dosiero
+
+# Other
+other alia
Added: trunk/no-interwiki/he.language_codes.txt
===================================================================
--- trunk/no-interwiki/he.language_codes.txt (rev 0)
+++ trunk/no-interwiki/he.language_codes.txt 2008-07-28 16:26:20 UTC (rev 43)
@@ -0,0 +1,150 @@
+en English
+de German
+fr French
+pl Polish
+ja Japanese
+it Italian
+ru Russian
+nl Dutch
+pt Portuguese
+es Spanish
+sv Swedish
+ru Russian
+zh Chinese
+no Norwegian Bokmal
+fi Finnish
+vo Volapuk
+ca Catalan
+ro Romanian
+tr Turkish
+uk Ukrainian
+eo Esperanto
+cs Czech
+hu Hungarian
+sk Slovak
+da Danish
+id Indonesian
+he Hebrew
+lt Lithuanian
+sr Serbian
+sl Slovenian
+ko Korean
+ar Arabic
+bg Bulgarian
+et Estonian
+hr Croatian
+new Newari
+te Telugu
+vi Vietnamese
+nn Norwegian Nynorsk
+th Thai
+fa Persian
+ga Galician
+ceb Cebuano
+el Greek
+ms Malay
+simple Simple English
+eu Basque
+bpy Bishnupriya Manipuri
+bs Bosnian
+lb Luxembourgish
+is Icelandic
+ka Georgian
+sq Albanian
+la Latin
+br Breton
+hi Hindi
+az Azeri
+bn Bengali
+mk Macedonian
+mr Marathi
+sh Serbocroatian
+tl Tagalog
+cy Welsh
+io Ido
+pms Piedmontese
+lv Latvian
+su Sundanese
+ta Tamil
+jv Javanese
+nap Neapolitan
+oc Occitan
+nds Low German
+scn Sicilian
+ast Asturian
+ku Kurdish
+be Belarusian (modern)
+be-x-old Belarusian (tarashkevitsa)
+tg Tajik
+an Aragonese
+ksh Ripuarian
+fy Frisian
+vec Venetian
+roa-tara Tarantino
+cv Chuvash
+zh-yue Cantonese
+ur Urdu
+qu Quechua
+sw Swahili
+uz Uzbek
+bat-smg Samogitian
+ga Irish Gaelic
+mi Maori
+ml Malayalam
+gd Scottish Gaelic
+yo Yoruba
+co Corsican
+kn Kannada
+pam Kapampangan
+yi Yiddish
+hsb Upper Sorbian
+nah Nahuatl
+ia Interlingua
+li Limburg
+sa Sanskrit
+hy Armenian
+als Alemannic
+tt Tatar
+roa-rup Aromanian
+map-bms Banyumasan
+pag Pangasinan
+am Amharic
+zh-min-nan Min Nan
+nrm Norman
+wuu Wuu
+fo Faroese
+vls West Flemish
+lmo Lombard
+nds-nl Dutch Low Saxon
+se Northern Sami
+rm Romansh
+ne Nepali
+war Waray-Waray
+fur Friulian
+lij Ligurian
+nov Novial
+sco Scots
+bh Bihari
+dv Divehi
+pi Pali
+diq Zazaki
+ilo Ilokano
+kk Kazakh
+os Ossetian
+zh-classical Classical Chinese
+frp Franco Provencal
+mt Maltese
+lad Ladino
+fiu-vro Voro
+pdc Pennsylvania German
+csb Kashubian
+kw Cornish
+bar Bavarian
+to Tongan
+haw Hawaii
+mn Mongolian
+ps Pashto
+km Khmer
+gv Manx
+tk Turkmen
+ln Lingala
Added: trunk/no-interwiki/he.strings.txt
===================================================================
--- trunk/no-interwiki/he.strings.txt (rev 0)
+++ trunk/no-interwiki/he.strings.txt 2008-07-28 16:26:20 UTC (rev 43)
@@ -0,0 +1,37 @@
+# months
+January ינואר
+February פברואר
+March מרץ
+April אפריל
+May מאי
+June יוני
+July יולי
+August אוגוסט
+September ספטמבר
+October אוקטובר
+November נובמבר
+December דצמבר
+
+in ב
+
+no_iw אין בינוויקי
+category קטגוריה
+disambig פירושונים
+template תבנית
+
+date תאריך
+type סוג
+
+# MW specials
+REDIRECT הפניה
+
+# Namespaces
+User משתמש
+User talk שיחת משתמש
+Image תמונה
+
+# Other
+other אחר
+rlm {{כ}}
+exclude_lowercase ß
+
Added: trunk/no-interwiki/oc.language_codes.txt
===================================================================
--- trunk/no-interwiki/oc.language_codes.txt (rev 0)
+++ trunk/no-interwiki/oc.language_codes.txt 2008-07-28 16:26:20 UTC (rev 43)
@@ -0,0 +1 @@
+link he.language_codes.txt
\ No newline at end of file
Property changes on: trunk/no-interwiki/oc.language_codes.txt
___________________________________________________________________
Added: svn:special
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <am...@us...> - 2008-07-28 16:15:55
|
Revision: 42
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=42&view=rev
Author: amire80
Date: 2008-07-28 16:16:05 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Adding config files and tidy script. Rewrote version number code.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Added Paths:
-----------
trunk/no-interwiki/.perlcriticrc
trunk/no-interwiki/.perltidyrc
trunk/no-interwiki/tidy.sh
Added: trunk/no-interwiki/.perlcriticrc
===================================================================
--- trunk/no-interwiki/.perlcriticrc (rev 0)
+++ trunk/no-interwiki/.perlcriticrc 2008-07-28 16:16:05 UTC (rev 42)
@@ -0,0 +1,6 @@
+#[-CodeLayout::RequireTidyCode]
+#[-Miscellanea::RequireRcsKeywords]
+
+# English.pm doesn't support named capture variables (yet)
+[Variables::ProhibitPunctuationVars]
+allow = %+ $+ @+
Property changes on: trunk/no-interwiki/.perlcriticrc
___________________________________________________________________
Added: svn:executable
+ *
Added: trunk/no-interwiki/.perltidyrc
===================================================================
--- trunk/no-interwiki/.perltidyrc (rev 0)
+++ trunk/no-interwiki/.perltidyrc 2008-07-28 16:16:05 UTC (rev 42)
@@ -0,0 +1,9 @@
+--backup-and-modify-in-place
+--maximum-line-length=78
+--continuation-indentation=4
+--nooutdent-long-lines
+--nooutdent-labels
+--outdent-keyword-list="next last redo goto"
+--paren-tightness=2
+--nospace-for-semicolon
+--nooutdent-long-comments
Property changes on: trunk/no-interwiki/.perltidyrc
___________________________________________________________________
Added: svn:executable
+ *
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:46:45 UTC (rev 41)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 16:16:05 UTC (rev 42)
@@ -12,10 +12,6 @@
# This program is Free Software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
-# $Revision$
-# $HeadURL$
-# $Date$
-
# Upgrade! This script actually uses new Perl 5.10 constructs, so you need it
use 5.010;
@@ -46,7 +42,16 @@
use Log::Log4perl qw(:easy);
use Readonly;
-our $VERSION = '0.1.9.02';
+#<<< no perltidy
+my %SVN_PROPS = ( ## no critic ValuesAndExpressions::RequireInterpolationOfMetachars
+ Revision => '$Revision$',
+ HeadURL => '$HeadURL$',
+ Date => '$Date$',
+);
+#>>>
+our $VERSION = ($SVN_PROPS{Revision} =~ /\A\$Revision:\ (?<revision_num>\d+)\ \$\z/xms)
+ ? "0.1.$+{revision_num}"
+ : croak(q(Something's wrong with SVN revision number));
my %PATTERN;
Readonly my $WIKITEXT_EXT => 'wiki.txt';
Added: trunk/no-interwiki/tidy.sh
===================================================================
--- trunk/no-interwiki/tidy.sh (rev 0)
+++ trunk/no-interwiki/tidy.sh 2008-07-28 16:16:05 UTC (rev 42)
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+FN=prepare_noiw_list.pl
+
+echo checking syntax
+perl -c $FN
+if [ $? -ne 0 ]; then
+ exit 1
+fi
+
+echo tidying
+perltidy $FN
+if [ $? -ne 0 ]; then
+ exit 1
+fi
+
+diff $FN.bak ${FN}
+if [ $? -eq 2 ]; then
+ exit 1
+fi
+
+echo criticizing
+/usr/local/bin/perlcritic -brutal $FN
+
+exit $?
+
Property changes on: trunk/no-interwiki/tidy.sh
___________________________________________________________________
Added: svn:executable
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <am...@us...> - 2008-07-28 15:46:36
|
Revision: 41
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=41&view=rev
Author: amire80
Date: 2008-07-28 15:46:45 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Set all properties correctly.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Property Changed:
----------------
trunk/no-interwiki/prepare_noiw_list.pl
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:44:59 UTC (rev 40)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:46:45 UTC (rev 41)
@@ -12,6 +12,10 @@
# This program is Free Software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
+# $Revision$
+# $HeadURL$
+# $Date$
+
# Upgrade! This script actually uses new Perl 5.10 constructs, so you need it
use 5.010;
@@ -1165,12 +1169,6 @@
return join $link_sep, @{$links_ref};
}
-# These are just dummies. One day it will sit in a real RCS.
-# Until then it's here to make Perl::Critic happy.
-# $Revision: 0.2 $
-# $HeadURL: http://en.wikipedia.org/wiki/Wikipedia:WikiProject_Interlanguage_Links/Ideas_from_the_Hebrew_Wikipedia/prepare_noiw_list.pl $
-# $Date$
-
__END__
=head1 NAME
Property changes on: trunk/no-interwiki/prepare_noiw_list.pl
___________________________________________________________________
Modified: svn:keywords
- Date
+ Date HeadURL Revision
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <am...@us...> - 2008-07-28 15:44:50
|
Revision: 40
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=40&view=rev
Author: amire80
Date: 2008-07-28 15:44:59 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Moved Readonly to correct section.
Modified Paths:
--------------
trunk/no-interwiki/prepare_noiw_list.pl
Property Changed:
----------------
trunk/no-interwiki/prepare_noiw_list.pl
Modified: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:28:06 UTC (rev 39)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:44:59 UTC (rev 40)
@@ -31,7 +31,6 @@
use Getopt::Long;
use Data::Dumper;
use File::Basename;
-use Readonly;
# CPAN
# You must install these modules from CPAN
@@ -41,6 +40,7 @@
use Regexp::Common;
use Lingua::Translit;
use Log::Log4perl qw(:easy);
+use Readonly;
our $VERSION = '0.1.9.02';
@@ -1169,7 +1169,7 @@
# Until then it's here to make Perl::Critic happy.
# $Revision: 0.2 $
# $HeadURL: http://en.wikipedia.org/wiki/Wikipedia:WikiProject_Interlanguage_Links/Ideas_from_the_Hebrew_Wikipedia/prepare_noiw_list.pl $
-# $Date: 2008-09-25 16:42:44 +0200 (Tue, 25 Sep 2007) $
+# $Date$
__END__
Property changes on: trunk/no-interwiki/prepare_noiw_list.pl
___________________________________________________________________
Added: svn:keywords
+ Date
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <am...@us...> - 2008-07-28 15:28:06
|
Revision: 39
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=39&view=rev
Author: amire80
Date: 2008-07-28 15:28:06 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Adding prepare_noiw_list.pl for the first time.
Added Paths:
-----------
trunk/no-interwiki/
trunk/no-interwiki/prepare_noiw_list.pl
Added: trunk/no-interwiki/prepare_noiw_list.pl
===================================================================
--- trunk/no-interwiki/prepare_noiw_list.pl (rev 0)
+++ trunk/no-interwiki/prepare_noiw_list.pl 2008-07-28 15:28:06 UTC (rev 39)
@@ -0,0 +1,1495 @@
+#!/usr/bin/perl
+
+# prepare_noiw_list.pl
+# version Noa - development
+
+# See the POD documentation at the end of the file or run
+# perldoc prepare_noiw_list.pl
+
+# "Had he been a French child,
+# he would have heard an infinite number of sentences" - Otto Jespersen
+
+# This program is Free Software; you can redistribute it and/or modify it
+# under the same terms as Perl itself.
+
+# Upgrade! This script actually uses new Perl 5.10 constructs, so you need it
+use 5.010;
+
+# pragmata
+use strict;
+use warnings;
+use integer;
+use open ':utf8';
+use utf8;
+
+# Standard library
+# These modules should come installed with Perl
+use English qw(-no_match_vars);
+use Carp qw(croak cluck);
+use Time::Local;
+use List::Util qw(first);
+use Getopt::Long;
+use Data::Dumper;
+use File::Basename;
+use Readonly;
+
+# CPAN
+# You must install these modules from CPAN
+
+# Versions before 0.51 have a bug with parsing the namespace of a page
+use Parse::MediaWikiDump 0.51;
+use Regexp::Common;
+use Lingua::Translit;
+use Log::Log4perl qw(:easy);
+
+our $VERSION = '0.1.9.02';
+
+my %PATTERN;
+Readonly my $WIKITEXT_EXT => 'wiki.txt';
+Readonly my $UNSORTED_EXT => "unsorted.$WIKITEXT_EXT";
+Readonly my $ALT_SEP => q{|};
+Readonly my $FIELD_SEP => qq{\t};
+Readonly my $LINK_SEP => q{|};
+Readonly my $TYPE_SEP => qr/\s*;\s*/xms;
+
+Readonly my $DEFAULT_MAX_IW_PLACES => 20;
+Readonly my $DEFAULT_PAGE_FREQ => 1000;
+Readonly my $DEFAULT_MAX_LINKS_PER_SECTION => 100;
+Readonly my $DEFAULT_MAX_SECTIONS_PER_PAGE => 20;
+
+# Initialize logger
+Log::Log4perl->easy_init(
+ {
+ level => $DEBUG, # print everything
+ file => ':utf8>outtest.log', # utf is important
+ layout => '%m%n', # No need to print the date
+ }
+);
+
+my %option = (
+ rtl => 0, # TODO Should identify default by lang code
+ stop_after => 0,
+ start_from => 0,
+ page_freq => $DEFAULT_PAGE_FREQ,
+ max_links_per_section => $DEFAULT_MAX_LINKS_PER_SECTION,
+ max_sections_per_page => $DEFAULT_MAX_SECTIONS_PER_PAGE,
+ max_iw_places => $DEFAULT_MAX_IW_PLACES,
+);
+
+my $valid_options = GetOptions(
+ 'rtl!' => \$option{rtl},
+ 'stop_after=i' => \$option{stop_after},
+ 'start_from=i' => \$option{start_from},
+ 'page_freq=i' => \$option{page_freq},
+ 'max_links_per_section' => \$option{max_links_per_section},
+ 'max_sections_per_page' => \$option{max_sections_per_page},
+ 'max_iw_places' => \$option{max_iw_places},
+);
+
+if (not $valid_options) {
+ croak('Invalid command line options.');
+}
+
+$PATTERN{dump_fn} = qr{
+ \A # Begin string
+ (?<wiki_lang>\w+) # Lang code
+ wiki # Is supposed to be after the lang code
+}xms;
+
+$PATTERN{field_sep} = qr{\t}xms;
+
+$PATTERN{string_skip} = qr{
+ \A # Begin string
+ \s* # Zero or more spaces
+ (?:\#.*)? # Comment lines
+ \z # End string
+}xms;
+
+# pages-meta-current
+my $dump_fn = $ARGV[0]
+ or croak('Dump filename must be supplied as an argument.');
+my %STRING;
+my $WIKI_LANG;
+if ((basename $dump_fn) =~ $PATTERN{dump_fn}) {
+ $WIKI_LANG = $+{wiki_lang};
+ %STRING = get_strings($WIKI_LANG);
+}
+else {
+ croak("$dump_fn is a weird dump file name.");
+}
+
+# XXX - bad i18n
+# ISO 9 is mostly good for Russian and it is still not perfect ASCII
+my $TRANSLITERATOR = Lingua::Translit->new('ISO 9');
+
+Readonly my %MW_SYNTAX => (
+ 'start_tmpl' => '{{',
+ 'end_tmpl' => '}}',
+ 'start_link' => '[[',
+ 'end_link' => ']]',
+ 'param_sep' => q{|},
+ 'paragraph' => qq{\n\n},
+ 'start_wikitable' => '{|',
+ 'namespace_sep' => q{:},
+);
+
+# This monstrosity basically says: | and optional spaces
+$PATTERN{param_sep} = qr{\s*\Q$MW_SYNTAX{param_sep}\E\s*}xms;
+
+Readonly my @INCLUDE_NAMESPACES => (
+ q{}, # Empty is a specific case
+ 'category',
+);
+
+#
+# Constants for date processing
+#
+
+Readonly my @MONTHS => @STRING{
+ qw(
+ January February March
+ April May June
+ July August September
+ October November December
+ )
+ };
+my $ALT_MONTHS = join $ALT_SEP, @MONTHS;
+my %REV_MONTH;
+Readonly my $LAST_MONTH => 11;
+@REV_MONTH{@MONTHS} = (0 .. $LAST_MONTH);
+
+# XXX Internationalize
+$PATTERN{heb_date} = qr{
+ \A # begin
+ (?<hour>\d{1,2}) # hour
+ : # :
+ (?<min>\d{2}),\s # minute
+ (?<mday>\d{1,2})\s # day of month
+ $STRING{in}? # This preposition appears sometimes
+ (?<mon>$ALT_MONTHS)\s # A name of a month
+ (?<year>\d+?)\s # Year
+ \([A-Z]{3}\) # Three letters in brackets - timezone
+ \z # end
+}xms;
+
+$PATTERN{param} = qr{
+ \A # Beginning of a string
+ \s* # Zero or more space
+ (?: # No capture
+ (?<param_name>\w+) # Word chars - param name
+ \s* # Zero or more space
+ = # =
+ )? # Param name is optional
+ (?<value>.*) # value
+}xms;
+
+$PATTERN{simple_no_iw_check} = qr{
+ \Q$STRING{no_iw}\E # the string may have spaces
+}xmsi;
+
+# XXX HACK Until i get a better regex for matching balancing {{}} ...
+$PATTERN{template} = qr{
+ \A # beginning of string
+ \Q$MW_SYNTAX{start_tmpl}\E # {{
+ .+ # some chars
+ \Q$MW_SYNTAX{end_tmpl}\E # }}
+ \z # end of string
+}xms;
+
+$PATTERN{wikitable} = qr{
+ \A
+ \Q$MW_SYNTAX{start_wikitable}\E
+}xms;
+
+# Redirect in local language.
+# Regular expression mostly copied from
+# Parse::MediaWikiDump::page::redirect
+# TODO: Try to look for the local language redirect keyword in the dump.
+$PATTERN{local_redirect} = qr{
+ \A # Beginning of string (page)
+ \# # a # character
+ $STRING{REDIRECT} # Redirect keyword in local language
+ \s*:?\s*\[\[([^\]]*)\]\] # the link after the redirect
+}xmsi;
+
+$PATTERN{ltr_char} = qr/\P{IsLeftToRight}/xms;
+$PATTERN{true_template} = qr{$RE{balanced}{-parens=>'{}'}}xms; # XXX very bad
+$PATTERN{section_link} = qr{(?<!&)\#}xms;
+$PATTERN{lowercase_link} = qr{\A[[:lower:]]}xms;
+if ($STRING{exclude_lowercase}) {
+ $PATTERN{exclude_lowercase} = qr{\A[$STRING{exclude_lowercase}]}xms;
+}
+$PATTERN{numbered_file} = qr{
+ (?: _ \d*)?
+ \.$WIKITEXT_EXT
+}xms;
+$PATTERN{invalid_filename_char} = qr{[\\\n/:*?"<>|]}xms; # "
+
+# TODO: Check whether it's Neapolitan with its ''
+$PATTERN{character_code_in_link} = qr{
+ (?:
+ [%.] # There are both %C4%B0 and .AA.E0
+ [[:xdigit:]]{2} # 2 hex digits
+ )
+ |
+ (?:
+ &\#\d+; # stuff like Š
+ )
+}xms;
+
+my %LANG_CODE;
+Readonly my $LANG_CODE_FN => "$WIKI_LANG.language_codes.txt";
+open my $lang_code_file, '<', $LANG_CODE_FN
+ or croak(file_error('opening', $LANG_CODE_FN, 'reading'));
+while (my $line = <$lang_code_file>) {
+ chomp $line;
+ my ($code, $name) = split /\t/xms, $line;
+ $LANG_CODE{$code} = $name;
+}
+close $lang_code_file
+ or croak(file_error('closing', $LANG_CODE_FN, 'reading'));
+
+Readonly my $ALT_LANGS => join $ALT_SEP, keys %LANG_CODE;
+
+# XXX Should use ALT_LANGS, but an efficient way is needed to update
+# lang codes list, so in the meantime it's loose.
+$PATTERN{interwiki_link} = qr{
+ \Q$MW_SYNTAX{start_link}\E
+ (?<lang_code>
+# $ALT_LANGS
+ [a-zA-Z-]+
+ )
+ :
+ (?<foreign_article>
+ .+?
+ )
+ \Q$MW_SYNTAX{end_link}\E
+}xms;
+
+# Lojban allows lowercase articles
+Readonly my @LOWERCASE_LANGS => qw(jbo);
+Readonly my $COOLING_DAYS => 120;
+Readonly my $COOLING_SECONDS => $COOLING_DAYS * 24 * 60 * 60;
+Readonly my $LATEST_COOLING => time - $COOLING_SECONDS;
+
+my %param_name;
+foreach my $english_param_name (qw(date type)) {
+ $param_name{ get_string($english_param_name) } = $english_param_name;
+}
+
+# TODO: Make smarter, configurable, whatever
+Readonly my $OUT_DIR => 'out';
+if (-d $OUT_DIR) {
+ unlink glob "$OUT_DIR/*";
+}
+else {
+ mkdir $OUT_DIR;
+}
+
+my $dump = Parse::MediaWikiDump::Pages->new($dump_fn);
+
+my $namespaces_alt = join $ALT_SEP,
+ grep { length > 0 } @{ $dump->namespaces_names() };
+$PATTERN{pure_title} = qr{
+ \A
+ (?:
+ (?:$namespaces_alt)
+ $MW_SYNTAX{namespace_sep}
+ )? # Namespace name is optional
+ (?<pure_title>.+)
+ \z
+}xms;
+
+# This is the only currently known value
+# but there could be more in the future
+if ($dump->case() ne 'first-letter') {
+ croak(q{unable to handle any case setting besides 'first-letter'});
+}
+
+my $page_counter;
+
+my %statistics;
+$statistics{count_iw} = [];
+my %namespace_count;
+my %type_count;
+my %found_links;
+
+my $begin_time = time;
+find_iwless();
+my $total_time = time - $begin_time;
+say "total time: $total_time";
+
+INFO("\nSUMMARY");
+foreach my $stat_type (sort keys %statistics) {
+ if (not ref $statistics{$stat_type}) {
+ INFO("$stat_type: $statistics{$stat_type}");
+ }
+}
+
+my $max_iw_index = $#{ $statistics{count_iw} };
+INFO("max_iw_index: $max_iw_index");
+for my $max_iw_place (0 .. $option{max_iw_places}) {
+ my @links =
+ map { make_link($_) } @{ $statistics{count_iw}->[$max_iw_index] };
+ INFO("# $max_iw_index: " . join_links(\@links, 0));
+ while (not defined $statistics{count_iw}->[ --$max_iw_index ]) {
+
+ # Do nothing, just count down to the next index with a defined list
+ }
+}
+
+INFO("\nNAMESPACES");
+foreach my $namespace (sort keys %namespace_count) {
+ INFO("$namespace: $namespace_count{$namespace}");
+}
+INFO("\nTYPES");
+foreach my $type (sort keys %type_count) {
+ INFO("$type: $type_count{$type}");
+}
+
+create_no_iw_pages();
+
+INFO(q{});
+say 'looking for multi links';
+my $begin_multi_links_time = time;
+create_multi_links_pages();
+my $total_multi_links_time = time - $begin_multi_links_time;
+say "total multi links time: $total_multi_links_time";
+
+exit;
+
+sub find_iwless {
+ PAGE:
+ while (my $page = $dump->page()) {
+ $page_counter++;
+ if ($page_counter % $option{page_freq} == 0) {
+ say $page_counter;
+ }
+
+ last PAGE
+ if ($option{stop_after}
+ and $page_counter > $option{stop_after});
+
+ next PAGE
+ if ($page_counter < $option{start_from});
+
+ my $namespace = $page->namespace() || 'main';
+ $namespace_count{$namespace}++;
+
+ # Skipping cases:
+ next PAGE
+ if (is_redirect($page)
+ or not is_in_namespace($page, @INCLUDE_NAMESPACES));
+
+ INFO("\n* processing $page_counter - ", $page->title());
+
+ my $page_text_ref = $page->text();
+
+ # A simple sanity check: is the no_iw template anywhere around here?
+ my $has_tmpl_no_iw =
+ (${$page_text_ref} =~ $PATTERN{simple_no_iw_check});
+
+ # Does the page have interwiki links?
+ # XXX Actually checks only for English
+ my $has_iw = has_interwiki($page);
+
+ if ($has_iw) {
+ INFO("has link to $has_iw");
+ if ($has_tmpl_no_iw) {
+ INFO('has template no_iw. trying to remove ...');
+ remove_tmpl_no_iw($page_text_ref);
+ $statistics{'has both valid interwiki and template'}++;
+ }
+ }
+ else { # doesn't have iw
+ process_iwless_page($page, $has_tmpl_no_iw, $has_iw);
+ }
+ }
+
+ return;
+}
+
+sub process_iwless_page {
+ my (
+ $page, # object ref
+ $has_tmpl_no_iw, # scalar bool
+ $has_iw # scalar bool
+ ) = @_;
+
+ INFO(q(doesn't have iw link.)); # '
+ $statistics{'has no interwiki link'}++;
+
+ # Now we need to search for no_iw templates
+ # and parse their parameters - date and type
+
+ my @found_templates = ();
+
+ my $page_text_ref = $page->text();
+ my $page_title = $page->title();
+
+ # Optimized - doesn't start searching,
+ # if we already know that it's not there
+ if ($has_tmpl_no_iw) {
+ find_templates($page_text_ref, \@found_templates,
+ [ get_string('no_iw') ]);
+ }
+
+ my $found_templates_count = scalar @found_templates;
+ INFO("found templates: $found_templates_count");
+
+ my $template;
+ if ($found_templates_count) {
+
+ # Assume the first one is the right one
+ $template = $found_templates[0];
+ if ($found_templates_count > 1) {
+ WARN('many templates were found');
+ $statistics{'many templates'}++;
+ }
+ else {
+ INFO('good, found one template');
+ $statistics{'one template'}++;
+ }
+ }
+ else {
+ INFO('no templates found');
+ $statistics{'no templates found'}++;
+ }
+
+ if (defined $template) {
+ my $date_str = $template->{params}->{date};
+
+ INFO('has template no_iw. checking cooling date ... ');
+ if (not defined $date_str
+ or cooling_date_passed($date_str))
+ {
+ INFO('cooling date passed, updating to today ...');
+ update_cooling_date($page_text_ref);
+ $statistics{'cooling date passed'}++;
+ }
+ else {
+ INFO(q(cooling date didn't pass.)); # '
+ $statistics{q(cooling date didn't pass)}++; # '
+ }
+ }
+
+ my @all_types = get_all_types($template->{params}->{type}, $page);
+
+ foreach my $type (@all_types) {
+ INFO("adding $page_title to the list as type $type");
+ add_to_no_iw_list($page, $type);
+ $type_count{$type}++;
+ }
+
+ return 1;
+}
+
+sub get_all_types {
+ my ($type_param, $page) = @_;
+
+ $type_param //= q{};
+ strip_whitespace($type_param);
+
+ my @all_types = split $TYPE_SEP, $type_param;
+
+ my $page_title = $page->title();
+ if (is_category($page)) {
+ INFO("$page_title is a category");
+ push @all_types, get_string('category');
+ $statistics{'categories'}++;
+ }
+
+ if (is_disambig($page)) {
+ INFO("$page_title is a disambiguation");
+ push @all_types, get_string('disambig');
+ $statistics{'disambig'}++;
+ }
+
+ # Still nothing?
+ if (not scalar @all_types) {
+ my $other_type = get_string('other');
+ INFO("$page_title doesn't have any type, adding to $other_type");
+ @all_types = ($other_type);
+ $statistics{'automatically added to type other'}++;
+ }
+
+ return @all_types;
+}
+
+sub find_templates {
+ my (
+ $page_text_ref, # string ref
+ $found_templates_ref, # array ref
+ $filter # string array ref
+ ) = @_;
+
+ # A reference to an array with one empty string.
+ # Matching against an empty string will always succeed.
+ $filter //= [q{}];
+
+ # Get all highest-level matches
+ my @matches = (${$page_text_ref} =~ /$PATTERN{true_template}/xmsgo);
+
+ MATCH:
+ foreach my $next_match (@matches) {
+ if ($next_match !~ $PATTERN{template}) {
+ INFO(q(i thought that it's a template, but it was:)); # '
+ if ($next_match =~ $PATTERN{wikitable}) {
+ INFO('a wikitable');
+ }
+ else {
+ INFO("something else: $next_match");
+ }
+ INFO(q{});
+ next MATCH;
+ }
+
+ foreach my $next_filter (@{$filter}) {
+
+ # XXX Matches anywhere in the template.
+ # It probably should match the template name.
+ # Also - it's case-insensitive which is very wrong
+ # but kinda useful.
+ if ($next_match =~ /\Q$next_filter/xmsi) {
+
+ # N.B.: parse_template calls find_templates() recursively
+ my $parsed_template =
+ parse_template(\$next_match, [qw(date type)], $filter);
+ push @{$found_templates_ref}, $parsed_template;
+ }
+ }
+ }
+
+ return $found_templates_ref;
+}
+
+sub parse_template {
+ my (
+ $template, # string ref
+ $default_param_names, # string array ref
+ $subtmpl_filter, # string array ref
+ ) = @_;
+
+ # %parsed_template:
+ # {text} - string ref; might be big
+ # {name} - string
+ # {params} - hash ref
+ # {subtemplates} - array ref
+ my (%parsed_template, %parsed_params, @clauses);
+
+ $parsed_template{text} = strip_tmpl_curlies(${$template});
+
+ # First string of the split is the template name,
+ # the rest is the params
+ ($parsed_template{name}, @clauses) =
+ (split $PATTERN{param_sep}, ${ $parsed_template{text} });
+
+ my $param_counter = 0;
+ foreach my $clause (@clauses) {
+ if ($clause =~ $PATTERN{param}) {
+ #<<< no perltidy
+ my ($name, $value) = @+{ qw(param_name value) };
+ #>>>
+ if (defined $name) {
+ if (defined $param_name{$name}) {
+ $name = $param_name{$name};
+ }
+ }
+ else {
+
+ # Get next default param name.
+ # If a default name is not defined, just use the number
+ $name = $default_param_names->[$param_counter]
+ || $param_counter;
+ }
+ $parsed_params{$name} = $value;
+ }
+ else {
+ my $error_msg = "Weird - $clause doesn't look a param";
+ INFO($error_msg);
+ cluck($error_msg);
+ $statistics{'weird param'}++;
+ }
+ $param_counter++;
+ }
+ $parsed_template{params} = \%parsed_params;
+
+ # Possible recursion
+ find_templates($parsed_template{text}, $parsed_template{subtemplates},
+ $subtmpl_filter);
+
+ return \%parsed_template;
+}
+
+sub parse_date {
+ my ($date_str) = @_;
+
+ return if (not defined $date_str);
+
+ if ($date_str =~ $PATTERN{heb_date}) {
+ INFO("found a valid date: $date_str");
+ my %parsed_date = (
+ 'sec' => 0, # useful for timelocal
+ );
+ foreach my $date_part (qw(hour min mday mon year)) {
+ $parsed_date{$date_part} = $+{$date_part};
+ }
+ $parsed_date{mon} = $REV_MONTH{ $parsed_date{mon} };
+
+ # strip leading zeros
+ foreach (values %parsed_date) {
+ s{
+ \A
+ 0+
+ (?<number>\d+)
+ }
+ {$+{number}}xms;
+ }
+ return \%parsed_date;
+ }
+ else {
+ INFO("invalid date: $date_str");
+ }
+
+ # Returns undef for an invalid date
+ return;
+}
+
+sub strip_tmpl_curlies {
+ my ($template) = @_;
+ for ($template) {
+ s{
+ \A
+ \Q$MW_SYNTAX{start_tmpl}\E
+ }{}xms;
+ s{
+ \Q$MW_SYNTAX{end_tmpl}\E
+ \z
+ }{}xms;
+ }
+ return \$template;
+}
+
+# no arg unpacking for simplicity and performance
+sub strip_whitespace { ## no critic Subroutines::RequireArgUnpacking
+ for (@_) {
+ s/\A\s*//xms;
+ s/\s*\z//xms;
+ }
+ return;
+}
+
+sub has_interwiki {
+ my ($page) = @_;
+
+ my $page_title = $page->title();
+ my $page_text = ${ $page->text() };
+ study $page_text;
+
+ my %iw_links;
+ my %special_cases;
+
+ while ($page_text =~ /$PATTERN{interwiki_link}/xmsgo) {
+ my ($lang_code, $foreign_article) = @+{qw(lang_code foreign_article)};
+ if (defined $iw_links{$lang_code}) {
+ $special_cases{double_links}->{$lang_code} = q{};
+ }
+ else {
+ $iw_links{$lang_code} = $foreign_article;
+ }
+
+ # A # sign not after an &.
+ # After an & it's probably a character number.
+ if ($foreign_article =~ $PATTERN{section_link}) {
+ $special_cases{section_links}->{$lang_code} = q{};
+ }
+
+ # Char codes are common in section links, so there's no
+ # need to show them again
+ elsif ($foreign_article =~ $PATTERN{character_code_in_link}) {
+ $special_cases{charnumber_links}{$lang_code} = q{};
+ }
+
+ # Lowercase links
+ if ( (not $lang_code ~~ @LOWERCASE_LANGS)
+ and ($foreign_article =~ $PATTERN{lowercase_link}))
+ {
+ my $include_lowercase_link = 1;
+ if (defined $STRING{exclude_lowercase}
+ and $foreign_article =~ $PATTERN{exclude_lowercase})
+ {
+ $include_lowercase_link = 0;
+ }
+
+ if ($include_lowercase_link) {
+ $special_cases{lowercase_links}{$lang_code} = q{};
+ }
+ }
+
+ $found_links{$lang_code}->{$foreign_article}->{$page_title} = q{};
+ }
+
+ my @all_langs = keys %iw_links;
+ my $count_iw = scalar @all_langs;
+ if ($count_iw) {
+ if (not defined $statistics{count_iw}->[$count_iw]) {
+ $statistics{count_iw}->[$count_iw] = [];
+ }
+ push @{ $statistics{count_iw}->[$count_iw] }, $page_title;
+ }
+ INFO("iw link count for $page_title: $count_iw");
+
+ for my $special_case_name (keys %special_cases) {
+ if (scalar %{ $special_cases{$special_case_name} }) {
+ special_cases_file($special_case_name,
+ $special_cases{$special_case_name}, $page);
+ }
+ }
+
+ # XXX Still very stupid, but getting better
+ if (defined $iw_links{en}) {
+ return 'en';
+ }
+
+ return q{};
+}
+
+sub special_cases_file {
+ my ($special_case_name, $special_cases_ref, $page) = @_;
+ my $special_case_langs = join q{, }, sort keys %{$special_cases_ref};
+ my $special_case_fn = make_type_fn($special_case_name, 1);
+ if (not -e $special_case_fn) {
+ append_to_file($special_case_fn, $special_case_name);
+ }
+ my $page_title = $page->title();
+ my $link = make_link($page_title);
+ my $line =
+ $link
+ . " ($special_case_langs)"
+ . $FIELD_SEP
+ . get_sort_title($page_title);
+
+ append_to_file($special_case_fn, $line);
+ return;
+}
+
+sub remove_tmpl_no_iw {
+ my ($params) = @_;
+ INFO( "Supposed to remove the no_iw template now, but ...\n"
+ . 'This sub is a stub. You can help Wikipedia by expanding it!');
+
+ return 0;
+}
+
+sub cooling_date_passed {
+ my ($date_string) = @_;
+
+ # $date is a hash ref
+ my $date_ref = parse_date($date_string);
+ if (not defined $date_ref) {
+ INFO('in cooling_date_passed invalid date');
+ return 1;
+ }
+
+ my @page_times = @{$date_ref}{qw(sec min hour mday mon year)};
+ INFO("page times: @page_times");
+
+ my $pageseconds = timelocal(@page_times);
+ return $pageseconds < $LATEST_COOLING;
+}
+
+sub update_cooling_date {
+ my ($params) = @_;
+
+ INFO( "Supposed to update cooling date now, but ...\n"
+ . "This sub is a stub. You can help Wikipedia by expanding it!\n"
+ );
+
+ return 0;
+}
+
+# Just a debugging thingie
+sub print_parsed_template {
+ my ($template_ref) = @_; # hash ref
+
+ INFO('text:');
+ INFO(${ $template_ref->{text} });
+ INFO('name:');
+ INFO($template_ref->{name});
+ INFO('params:');
+ foreach my $next_param (sort keys %{ $template_ref->{params} }) {
+ INFO("$next_param: $template_ref->{params}->{$next_param}");
+ }
+
+ if (defined $template_ref->{subtemplates}) {
+ INFO("subtemplates: $template_ref->{subtemplates}");
+ }
+
+ return;
+}
+
+sub get_sort_title {
+ my ($page_title) = @_;
+
+ my $sort_title;
+ if ($page_title =~ $PATTERN{pure_title}) {
+ $sort_title = $+{pure_title};
+ }
+ else {
+ INFO( 'Something badly weird happened - '
+ . "$page_title has no pure title");
+ croak('A page has no pure title');
+ }
+
+ return $sort_title;
+}
+
+sub add_to_no_iw_list {
+ my (
+ $page, # ref
+ $page_type, # string
+ ) = @_;
+
+ my $page_title = $page->title();
+ INFO("Adding $page_title to no_iw_list");
+
+ my $sort_title = get_sort_title($page_title);
+ INFO("full title: $page_title");
+ INFO("sort title: $sort_title");
+
+ my $unsorted_type_fn = make_type_fn($page_type, 1);
+ if (not -e $unsorted_type_fn) {
+ append_to_file($unsorted_type_fn, $page_type);
+ }
+ my $link = make_link($page_title);
+ INFO("link to page: $link");
+ my $line = $link . $FIELD_SEP . $sort_title;
+
+ append_to_file($unsorted_type_fn, $line);
+
+ return;
+}
+
+sub make_link {
+ my ($page_title) = @_;
+
+ my $link_to_page =
+ $MW_SYNTAX{start_link}
+ . $MW_SYNTAX{namespace_sep}
+ . $page_title
+ . $MW_SYNTAX{end_link};
+
+ if ($option{rtl}) {
+ if ($page_title =~ $PATTERN{ltr_char}) {
+ $link_to_page = $STRING{rlm} . $link_to_page . $STRING{rlm};
+ }
+ }
+
+ return $link_to_page;
+}
+
+sub create_no_iw_pages {
+ my ($params) = @_;
+
+ INFO('creating no_iw pages');
+
+ # Run over page types
+ UNSORTED_TYPE_FN:
+ foreach my $unsorted_type_fn (glob "$OUT_DIR/*$UNSORTED_EXT") {
+ my %all_pages_in_type = ();
+ open my $unsorted_type_file, '<', $unsorted_type_fn
+ or croak(file_error('opening', $unsorted_type_fn, 'reading'));
+ my @lines = <$unsorted_type_file>;
+ close $unsorted_type_file
+ or croak(file_error('closing', $unsorted_type_fn, 'reading'));
+
+ my $type_name = shift @lines;
+ chomp $type_name;
+ foreach my $line (@lines) {
+ chomp $line;
+ my ($page_title, $sort_title) = split $PATTERN{field_sep}, $line;
+ my $sort_letter = get_sort_letter($sort_title);
+ $all_pages_in_type{$sort_letter} //= [];
+ push @{ $all_pages_in_type{$sort_letter} }, $page_title;
+ }
+ write_sorted_pages($type_name, \%all_pages_in_type);
+ }
+ return;
+}
+
+sub write_sorted_pages {
+ my ($type_name, $type_tree_ref) = @_;
+
+ my $type_fn = make_type_fn($type_name); # XXX
+
+ my $section_counter = 0;
+ my $page = q{};
+ my $file_number = 1;
+
+ LETTER:
+ foreach my $next_letter (sort keys %{$type_tree_ref}) {
+ my @all_links_in_letter =
+ sort @{ $type_tree_ref->{$next_letter} };
+
+ my $links_count = scalar @all_links_in_letter;
+ my $sections =
+ ($links_count - 1) / $option{max_links_per_section} + 1;
+ my $links_per_section = $links_count / $sections;
+
+ SECTION:
+ foreach my $section_num (1 .. $sections) {
+ if ($section_counter == $option{max_sections_per_page}) {
+ write_page(\$page, \$type_fn, $file_number++);
+ $section_counter = 0;
+ undef $page; # XXX Trying to free memory
+ $page = q{};
+ }
+ elsif ($section_counter) {
+ $page .= $MW_SYNTAX{paragraph};
+ }
+ $section_counter++;
+ my $title = $next_letter;
+ if ($sections > 1) {
+ $title .= " $section_num";
+ }
+ $page .= mw_heading($title);
+
+ my $first_link = ($section_num - 1) * $links_per_section;
+ my $last_link =
+ ($section_num == $sections)
+ ? $links_count - 1
+ : $first_link + $links_per_section - 1;
+ my $links = join_links(
+ [ @all_links_in_letter[ $first_link .. $last_link ] ]);
+ $page .= $links;
+ }
+ undef @all_links_in_letter; # XXX Trying to free memory
+ }
+
+ # The page may be empty at this point
+ if ($page) {
+ write_page(\$page, \$type_fn, $file_number++);
+ }
+ undef $page;
+
+ return;
+}
+
+sub write_page {
+ my ($page_ref, $type_fn_ref, $file_number) = @_;
+
+ my $pretty_file_number = sprintf '%03d', $file_number;
+ ${$type_fn_ref} =~ s{
+ $PATTERN{numbered_file}
+ }
+ {_$pretty_file_number.$WIKITEXT_EXT}xmso;
+ INFO("creating file ${$type_fn_ref}");
+ append_to_file(${$type_fn_ref}, ${$page_ref});
+
+ return;
+}
+
+sub mw_heading {
+ my (
+ $text, # string
+ $level # number
+ ) = @_;
+
+ $level //= 2;
+ my $level_marker = q{=} x $level;
+
+ # Line ending is mandatory
+ return "$level_marker $text $level_marker\n";
+}
+
+# Custom Unicode character property for finding characters.
+# The custom is to give those subroutines CamelCase names.
+sub IsLeftToRight { ## no critic (NamingConventions::ProhibitMixedCaseSubs)
+ return <<'END';
++utf8::InHebrew
++utf8::IsSpace
++utf8::IsPunct
+END
+}
+
+sub is_redirect {
+ my ($page) = @_;
+ my $page_title = $page->title();
+ my $page_text_ref = $page->text();
+
+ if ($page->redirect()) {
+ INFO("\nEnglish redirect: $page_title");
+ return 1;
+ }
+ if (${$page_text_ref} =~ $PATTERN{local_redirect}) {
+ INFO("\nLocal redirect: $page_title");
+ return 1;
+ }
+ return 0;
+}
+
+sub is_in_namespace {
+ my ($page, @namespaces) = @_;
+
+ return $page->namespace() ~~ [ map { get_string($_) } @namespaces ];
+}
+
+sub is_category {
+ my ($page) = @_;
+ return is_in_namespace($page, 'category');
+}
+
+sub is_disambig {
+ my ($page) = @_;
+ my $found_templates =
+ find_templates($page->text(), [], [ get_string('disambig') ]);
+ return scalar @{$found_templates};
+}
+
+#
+# This is poor man's gettext.
+# TODO: Replace it with Locale::Maketext
+#
+sub get_strings {
+ my ($lang) = @_;
+
+ my $STRINGS_FN = "$lang.strings.txt";
+
+ # TODO: Refactor or upgrade to Locale::Maketext
+ open my $STRINGS_FILE, '<:utf8', $STRINGS_FN
+ or croak(file_error('opening', $STRINGS_FN, 'reading'));
+ my @strings_file_lines = <$STRINGS_FILE>;
+ close $STRINGS_FILE
+ or croak(file_error('closing', $STRINGS_FN, 'reading'));
+
+ my %STRING;
+
+ STRING_LINE:
+ foreach my $next_string_line (@strings_file_lines) {
+
+ # Skip blanks and comments
+ next STRING_LINE if ($next_string_line =~ $PATTERN{string_skip});
+
+ chomp $next_string_line;
+ my ($english, $target) = split $PATTERN{field_sep}, $next_string_line;
+
+ # Fallback to English if no target language string was supplied
+ $STRING{$english} = $target // $english;
+ }
+
+ return %STRING;
+}
+
+sub get_string {
+ my ($english) = @_;
+ return $STRING{$english} //= $english;
+}
+
+sub make_type_fn {
+ my ($type, $unsorted) = @_;
+ $unsorted //= 0;
+
+ #my $transliterated_type = $TRANSLITERATOR->translit($type);
+ my $transliterated_type = $type;
+
+ my $ext = $unsorted ? $UNSORTED_EXT : $WIKITEXT_EXT;
+ my $type_fn = "$transliterated_type.$ext";
+
+ $type_fn =~ s{$PATTERN{invalid_filename_char}}{-}xmsgo;
+ $type_fn = "$OUT_DIR/$type_fn";
+
+ return $type_fn;
+}
+
+sub append_to_file {
+ my ($fn, $line) = @_;
+
+ open my $file, '>>:utf8', $fn
+ or croak(file_error('opening', $fn, 'appending'));
+
+ say {$file} $line;
+
+ close $file
+ or croak(file_error('closing', $fn, 'appeding'));
+
+ return;
+}
+
+# It appears simple, but non-alphabetic languages such as Chinese it must be
+# different, so it will sit here ready for better i18n.
+sub get_sort_letter {
+ my ($string) = @_;
+ return substr $string, 0, 1;
+}
+
+sub file_error {
+ my ($operation, $fn, $access_type) = @_;
+ my $string = "error $operation $fn for $access_type: $OS_ERROR";
+ return $string;
+}
+
+sub create_multi_links_pages {
+ LANG_CODE:
+ foreach my $lang_code (sort keys %found_links) {
+ my $lang_fn = "$lang_code.multi_links.txt";
+ my @foreign_articles = sort keys %{ $found_links{$lang_code} };
+ FOREIGN_ARTICLE:
+ foreach my $foreign_article (@foreign_articles) {
+ my @local_articles =
+ keys %{ $found_links{$lang_code}->{$foreign_article} };
+ if (scalar @local_articles > 1) {
+ handle_multi_link($lang_code, $foreign_article);
+ }
+ }
+ }
+
+ return;
+}
+
+sub handle_multi_link {
+ my ($lang_code, $foreign_article) = @_;
+ my $links = join q{ | }, sort map { make_link($_) }
+ keys %{ $found_links{$lang_code}->{$foreign_article} };
+
+ my $foreign_title =
+ make_link($lang_code . $MW_SYNTAX{namespace_sep} . $foreign_article);
+ INFO("* '''$foreign_title''' - $links\n");
+ return;
+}
+
+sub join_links {
+ my ($links_ref, $line_end) = @_;
+ $line_end //= 1;
+
+ my $link_sep = q{ } . $LINK_SEP . ($line_end ? "\n" : q{ });
+ return join $link_sep, @{$links_ref};
+}
+
+# These are just dummies. One day it will sit in a real RCS.
+# Until then it's here to make Perl::Critic happy.
+# $Revision: 0.2 $
+# $HeadURL: http://en.wikipedia.org/wiki/Wikipedia:WikiProject_Interlanguage_Links/Ideas_from_the_Hebrew_Wikipedia/prepare_noiw_list.pl $
+# $Date: 2008-09-25 16:42:44 +0200 (Tue, 25 Sep 2007) $
+
+__END__
+
+=head1 NAME
+
+C<prepare_noiw_list.pl> - Searches a MediaWiki dump for pages without
+interlanguage links and prepares categorized lists of those pages.
+
+=head1 VERSION
+
+This documentation refers to
+
+prepare_noiw_list.pl
+
+version 0.2 - Noa.
+
+=head1 USAGE
+
+=over
+
+=item * C<prepare_noiw_list.pl ./big-files/ruwiki-20080420-pages-meta-current.xml>
+
+=item * C<prepare_noiw_list.pl --rtl ./big-files/hewiki-20080420-pages-meta-current.xml>
+
+=back
+
+=head1 REQUIRED ARGUMENTS
+
+=over
+
+=item * MediaWiki dump file name is obligatory.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item * --rtl makes special fixes for right-to-left languages.
+
+=item * --stop_after=NUMBER Stops processing after page with the given
+NUMBER.
+
+=item * --stop_after=NUMBER Begins processing after page with the given
+NUMBER.
+
+=item * --page_freq=NUMBER Print the page counter every NUMBER of
+pages. 1000 by default.
+
+=item * --max_links_per_section Maximum number of links per section in
+the output page. Default is 100.
+
+=item * --max_sections_per_page Maximum number of sections per output
+page. Default is 20.
+
+=back
+
+=head1 DESCRIPTION
+
+The main goal of this searching is to find pages which don't have
+interwiki (interlanguage) links to certain languages.
+
+This program scans a MediaWiki XML dump file. It searches every page for
+interwiki links and for a special hidden template, which defines to which
+types of interwiki-less pages it belongs.
+
+=over
+
+=item * If the page contains links to the defined languages and contains
+no "no interwiki" template, its processing stops.
+
+=item * If the page contains links to the defined languages and contains this
+template, it is logged, so the template can be removed. (It is planned that
+it will be removed automatically in the future.)
+
+=item * If the page contains no links to the defined languagesm but no
+template, it is automatically added to type "other".
+
+=item * If the page contains no links to the defined languages and
+a template with types, it is added to the defined types.
+
+=back
+
+Pages without links are added to nicely formatted lists
+to their type.
+
+This program also collects some information on the way about problematic
+cases - more than one "no interwiki" template, invalid templates, invalid
+or redundant interwiki links etc.
+
+You can get a dump for using this program at L<http://download.wikimedia.org/>.
+
+=head1 DIAGNOSTICS
+
+=head2 Invalid command line options.
+
+See L</"REQUIRED ARGUMENTS"> and L</"OPTIONS">.
+
+=head2 FILENAME is a weird dump file name.
+
+The dump file doesn't appear to have a standard name that appears
+at L<http://download.wikimedia.org/>.
+
+=head2 error opening FILENAME ...
+
+=head2 error closing FILENAME ...
+
+Check the corresponding error.
+
+=head2 unable to handle any case setting besides 'first-letter'
+
+Something is weird with the dump.
+
+=head2 A page has no pure title
+
+Something is particularly weird with the name of a page. The program can't
+separate its name from its namespace. It can also be a bug in this program.
+
+=head2 Some weirdness happened - STRING doesn't look a param
+
+STRING is supposed to be a parameter in a template, but it doesn't look like
+one. It could be an error in the template, and also a bug in this program
+(the parser that this program employs is rather limited).
+
+=head2 Unicode character 0xNUMBER is illegal
+
+This is a standard Perl warning. It may appear if a page or its title have
+funky Unicode characters which shouldn't be there according to the Unicode
+standard (to be more precise, according to the implementation of this
+standard in your version of perl). Most probably these characters aren't
+supposed to be in the page and should be fixed, but otherwise this issue
+is not supposed to affect the functionality of this program significantly.
+
+This was reported as a MediaWiki bug: L<https://bugzilla.wikimedia.org/show_bug.cgi?id=14600>
+
+=head1 EXIT STATUS
+
+Nothing interesting, read the log :)
+
+=head1 CONFIGURATION
+
+=head2 Language codes file
+
+The file name has the form C<LANGUAGE_CODE.language_codes.txt>, where
+LANGUAGE_CODE is the code of the wiki which is being processed. It includes
+a list of language codes which are searched, the full name of the language
+and the group to which this language belongs.
+
+=head2 Localized strings file
+
+The file name has the form C<LANGUAGE_CODE.strings.txt>, where LANGUAGE_CODE
+is the code of the wiki which is being processed. This file includes pairs
+of strings - the English name of a feature and its localized counterpart.
+
+=head2 Log
+
+The log is called outtest.log.
+
+=head1 DEPENDENCIES
+
+This module depends on these CPAN modules:
+
+=over
+
+=item * C<Parse::MediaWikiDump>
+
+This module is used for reading pages from the XML dump.
+
+=item * C<Regexp::Common>
+
+This module is used for searching templates which use balanced parenthesis,
+but pretty badly. :)
+
+=item * C<Log::Log4perl>
+
+This module provides easy global logging.
+
+=item * C<Lingua::Translit>
+
+This module is used for transliterating filenames to ASCII.
+
+=back
+
+=head1 HACKING
+
+=head2 Perl 5.10
+
+This program needs Perl 5.10. It has clean, new and useful syntax, which
+makes the programs easier to hack, maintain and debug. It's useless to try
+and run it on an older version, unless you want to waste your time
+backporting. Please upgrade your Perl installation if you still have 5.8 or
+something older.
+
+=head2 Perl Best Practices and Perl::Critic
+
+Great effort has been put into making this source code pass as cleanly as
+possible the Perl::Critic tests in the 'brutal' mode. If you modify it, do
+yourself a favor, install Perl::Critic and regularly test it using this command:
+
+perlcritic -brutal prepare_noiw_list.pl
+
+All places where P::C has been disabled using "# no critic" are explained.
+
+The time invested in making the code P::C-friendly will be returned as time
+saved on debugging. Also consider reading the book "Perl Best Practices" by
+Damian Conway if you haven't already.
+
+=head1 INCOMPATIBILITIES
+
+=head2 Unicode issues
+
+This program works best on GNU/Linux, where Perl and the filesystem are
+Unicode-friendly.
+
+This program was tested on Windows with ActivePerl 5.10 and Cygwin Perl 5.10.
+In both cases Unicode-related issues cause filenames and clipboard text
+to become jumbled.
+
+=head1 BUGS AND LIMITATIONS
+
+Please report all bugs, features requests and other comments to
+Amir E. Aharoni (ami...@gm...).
+
+=head2 English is king
+
+In the meantime this program can actually only find pages which don't have
+an interwiki link to the English Wikipedia. This is obviously not useful on
+the English Wikipedia and is conceptually problematic on other Wikipedias,
+too. This is being fixed, but it is not simple to do it Right.
+
+=head2 Internationalization is far from perfect
+
+Date handling and strings localization is very primitive. There are plans
+to upgrade it to smarter modules such as Locale::Maketext.
+
+=head2 MediaWiki parsing is ad hoc
+
+This program only does very rudimentary and ad hoc MediaWiki syntax parsing.
+
+=head2 Templates must be removed manually
+
+Templates on pages which already have the needed are not removed
+automatically. This is a stub; you can help Wikipedia by writing these
+functions!
+
+The actual reason for this is that the author doesn't want to write a bot
+that touches the live online Wikipedia until he is very sure that the rest
+of the script is very stable. Besides, manual work tends to make articles
+Wikipedia articles better! Quality is more important than quantity and speed.
+
+=head2 Cooling date
+
+The implementation of the cooling date is very rudimentary.
+
+=head2 No separation of searching and formatting
+
+There are two main function here: C<find_iwless()> and
+C<create_no_iw_pages()>. They are doing separate things and should run
+from different programs.
+
+=head1 HISTORY
+
+=over
+
+=item * B<0.2 - Noa>: Perl 5.10. Russian l10n. POD documentation. Pretty
+categories sorting. Memory usage optimization - accumulating information in
+files. More generic, but far-from-perfect handling of links to languages
+other than English. Translitetaion with Lingua::Translit. Logging with
+Log::Log4perl.
+
+=item * B<0.1 - First and unnamed Amir E. Aharoni's version>: Types
+introduced. Conceptual l10n, but only tested on he.wiki. Still en links
+only. Informative, though lo-tec logging.
+
+=item * B<0.0 - Felagund's version>: no types, he.wiki only, en links only,
+but it uploaded the lists automatically...
+
+=back
+
+=head1 AUTHOR
+
+=over
+
+=item * Creator is Nadav Perez (Felagund)
+
+=over
+
+=item * Hebrew: L<http://he.wikipedia.org/wiki/User:Felagund>
+
+=back
+
+=item * It's (roughly) based on another bot by Guy Shaked (Costello).
+
+=over
+
+=item * Hebrew: L<http://he.wikipedia.org/wiki/User:Costello>
+
+=back
+
+=item * Then Amir E. Aharoni (Amire80) came and mutilated it beyond
+recognition.
+
+=over
+
+=item * Hebrew: L<http://he.wikipedia.org/wiki/User:Amire80>
+
+=item * English: L<http://en.wikipedia.org/wiki/User:Amire80>
+
+=item * Russian: L<http://ru.wikipedia.org/wiki/User:Amire80>
+
+=back
+
+=back
+
+=head1 LICENSE AND COPYRIGHT
+
+This program is Free Software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+I<Visca la llibertat!>
+
+=cut
+
Property changes on: trunk/no-interwiki/prepare_noiw_list.pl
___________________________________________________________________
Added: svn:executable
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2008-07-28 13:35:50
|
Revision: 38
http://perlwikibot.svn.sourceforge.net/perlwikibot/?rev=38&view=rev
Author: rotemliss
Date: 2008-07-28 13:35:57 +0000 (Mon, 28 Jul 2008)
Log Message:
-----------
Make place to more modules.
Added Paths:
-----------
trunk/perlwikibot/
trunk/perlwikibot/README
trunk/perlwikibot/bot.pl
trunk/perlwikibot/config/
trunk/perlwikibot/dumps/
trunk/perlwikibot/includes/
Removed Paths:
-------------
trunk/README
trunk/bot.pl
trunk/config/
trunk/dumps/
trunk/includes/
Deleted: trunk/README
===================================================================
--- trunk/README 2007-03-24 15:51:07 UTC (rev 37)
+++ trunk/README 2008-07-28 13:35:57 UTC (rev 38)
@@ -1,13 +0,0 @@
-This is a bot which was written in Perl, for using on MediaWiki sites - especially Wikimedia Foundation projects.
-
-This bot is copyrighted under the terms of GNU GPL. It's based on original work of both Costello and Felagund (Nadav) from Hebrew Wikipedia. Authors:
-* Rotem Liss
-
-The file includes/dump.pm was downloaded from:
-http://search.cpan.org/~triddle/Parse-MediaWikiDump-0.31/
-
-You should get the XML file includes all the pages, but only their current revision. It is called pages-meta-current.xml.bz2 in http://download.wikimedia.org/, and you have to use "maintenance/dumpBackup.php --current" to create it in your own installation. Now extract it in "dumps".
-
-Now copy the files "configure.sample" and "runtime.sample" in the folder "config" to the names "configure.pm" and "runtime.pm", and configure them. You should first go to the configure file, then define the servers, then the actions, then go to the runtime file and define there what actions to execute, and most importantly - set "$sendPages" to 1, after the tests.
-
-After that, you can run the file "bot.pl", with the parameter of the site name, and the parameter "sysop" if you want to use the sysop user (if you don't use it, actions which require sysop rights won't be executed). In later times, you can edit the configure file when you want to change the actions, and always edit the run-time file to enable and disable the page sends, and to define the actions to do.
Deleted: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2007-03-24 15:51:07 UTC (rev 37)
+++ trunk/bot.pl 2008-07-28 13:35:57 UTC (rev 38)
@@ -1,59 +0,0 @@
-#!/usr/bin/perl
-
-# Code style
-use warnings;
-use strict;
-
-# Libraries
-use includes::actions;
-use includes::dump;
-use includes::functions;
-use includes::http;
-use config::configure;
-use config::runtime;
-
-# Do nothing if no server specified
-if ( !$ARGV[0] ) {
- die "No server name set - please set server name!\n";
-}
-
-my $sysop;
-if ( $ARGV[1] eq "sysop" ) {
- $sysop = 1;
-} else {
- $sysop = 0;
-}
-
-# Show notes about the sending pages configuration
-if ( $configure::sendPages == 0 ) {
- print "Note: There will be no changes in the real server. Please set sendPages to 1 to do them.\n";
-} elsif ( $configure::sendPages == 2 ) {
- print "Note: A prefix will be used when editing pages. Please set sendPages to 1 to disable that.\n";
-}
-
-# Log in to the server only if you have to do some stuff in the server
-if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- functions::login( $sysop );
-}
-
-# All the matching pages in the XML file
-my @pages = functions::getMatchingPages( $sysop );
-
-# Execute actions of all the pages
-for ( my $i = 0; $i <= $#pages; $i += 2 ) {
- # Get title and action
- my $title = $pages[$i];
- my $action = $pages[$i + 1];
- my $actionName = $configure::actions{ $action };
-
- # Execute the action
- if ( $actionName eq "replace" ) {
- functions::replaceInPage( $title, $action );
- } elsif ( $actionName eq "refresh" ) {
- functions::refreshPage( $title, $action );
- } elsif ( $actionName eq "move" ) {
- functions::movePage( $title, $action );
- } elsif ( $actionName eq "delete" ) {
- functions::deletePage( $title, $action );
- }
-}
Copied: trunk/perlwikibot/README (from rev 37, trunk/README)
===================================================================
--- trunk/perlwikibot/README (rev 0)
+++ trunk/perlwikibot/README 2008-07-28 13:35:57 UTC (rev 38)
@@ -0,0 +1,13 @@
+This is a bot which was written in Perl, for using on MediaWiki sites - especially Wikimedia Foundation projects.
+
+This bot is copyrighted under the terms of GNU GPL. It's based on original work of both Costello and Felagund (Nadav) from Hebrew Wikipedia. Authors:
+* Rotem Liss
+
+The file includes/dump.pm was downloaded from:
+http://search.cpan.org/~triddle/Parse-MediaWikiDump-0.31/
+
+You should get the XML file includes all the pages, but only their current revision. It is called pages-meta-current.xml.bz2 in http://download.wikimedia.org/, and you have to use "maintenance/dumpBackup.php --current" to create it in your own installation. Now extract it in "dumps".
+
+Now copy the files "configure.sample" and "runtime.sample" in the folder "config" to the names "configure.pm" and "runtime.pm", and configure them. You should first go to the configure file, then define the servers, then the actions, then go to the runtime file and define there what actions to execute, and most importantly - set "$sendPages" to 1, after the tests.
+
+After that, you can run the file "bot.pl", with the parameter of the site name, and the parameter "sysop" if you want to use the sysop user (if you don't use it, actions which require sysop rights won't be executed). In later times, you can edit the configure file when you want to change the actions, and always edit the run-time file to enable and disable the page sends, and to define the actions to do.
Copied: trunk/perlwikibot/bot.pl (from rev 37, trunk/bot.pl)
===================================================================
--- trunk/perlwikibot/bot.pl (rev 0)
+++ trunk/perlwikibot/bot.pl 2008-07-28 13:35:57 UTC (rev 38)
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+# Code style
+use warnings;
+use strict;
+
+# Libraries
+use includes::actions;
+use includes::dump;
+use includes::functions;
+use includes::http;
+use config::configure;
+use config::runtime;
+
+# Do nothing if no server specified
+if ( !$ARGV[0] ) {
+ die "No server name set - please set server name!\n";
+}
+
+my $sysop;
+if ( $ARGV[1] eq "sysop" ) {
+ $sysop = 1;
+} else {
+ $sysop = 0;
+}
+
+# Show notes about the sending pages configuration
+if ( $configure::sendPages == 0 ) {
+ print "Note: There will be no changes in the real server. Please set sendPages to 1 to do them.\n";
+} elsif ( $configure::sendPages == 2 ) {
+ print "Note: A prefix will be used when editing pages. Please set sendPages to 1 to disable that.\n";
+}
+
+# Log in to the server only if you have to do some stuff in the server
+if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ functions::login( $sysop );
+}
+
+# All the matching pages in the XML file
+my @pages = functions::getMatchingPages( $sysop );
+
+# Execute actions of all the pages
+for ( my $i = 0; $i <= $#pages; $i += 2 ) {
+ # Get title and action
+ my $title = $pages[$i];
+ my $action = $pages[$i + 1];
+ my $actionName = $configure::actions{ $action };
+
+ # Execute the action
+ if ( $actionName eq "replace" ) {
+ functions::replaceInPage( $title, $action );
+ } elsif ( $actionName eq "refresh" ) {
+ functions::refreshPage( $title, $action );
+ } elsif ( $actionName eq "move" ) {
+ functions::movePage( $title, $action );
+ } elsif ( $actionName eq "delete" ) {
+ functions::deletePage( $title, $action );
+ }
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2007-03-24 16:08:28
|
Revision: 37
http://svn.sourceforge.net/perlwikibot/?rev=37&view=rev
Author: rotemliss
Date: 2007-03-24 08:51:07 -0700 (Sat, 24 Mar 2007)
Log Message:
-----------
Fix libraries inclusion.
Modified Paths:
--------------
trunk/bot.pl
trunk/config/configure.sample
trunk/includes/functions.pm
trunk/includes/http.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2007-03-03 14:08:53 UTC (rev 36)
+++ trunk/bot.pl 2007-03-24 15:51:07 UTC (rev 37)
@@ -5,8 +5,6 @@
use strict;
# Libraries
-use LWP::UserAgent;
-use Time::Local;
use includes::actions;
use includes::dump;
use includes::functions;
Modified: trunk/config/configure.sample
===================================================================
--- trunk/config/configure.sample 2007-03-03 14:08:53 UTC (rev 36)
+++ trunk/config/configure.sample 2007-03-24 15:51:07 UTC (rev 37)
@@ -5,6 +5,9 @@
use warnings;
use strict;
+# Libraries
+use Time::Local;
+
# Available servers and details about them, the bot should work on
our %servers = (
"server" => "http://www.server.com",
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2007-03-03 14:08:53 UTC (rev 36)
+++ trunk/includes/functions.pm 2007-03-24 15:51:07 UTC (rev 37)
@@ -5,6 +5,9 @@
use warnings;
use strict;
+# Libraries
+use Time::Local;
+
# Get all the pages matching specific restrictions
sub getMatchingPages {
# Parameters
@@ -76,7 +79,7 @@
my @tempLastEditArray;
my $lastEdit = $page->timestamp;
$lastEdit =~ /([\d]+)-([\d]+)-([\d]+)T([\d]+):([\d]+):([\d]+)Z/;
- $lastEdit = main::timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
+ $lastEdit = timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
my $betweenTime = time() - $lastEdit;
$tempCounter = 0;
for ( my $i = 0; $i <= $#pagePossibleActions; $i++ ) {
Modified: trunk/includes/http.pm
===================================================================
--- trunk/includes/http.pm 2007-03-03 14:08:53 UTC (rev 36)
+++ trunk/includes/http.pm 2007-03-24 15:51:07 UTC (rev 37)
@@ -5,6 +5,9 @@
use warnings;
use strict;
+# Libraries
+use LWP::UserAgent;
+
# The browser we use to surf; enable cookies, and use a specific user agent
my $browser = LWP::UserAgent->new();
$browser->cookie_jar( {} );
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2007-03-03 14:08:53
|
Revision: 36
http://svn.sourceforge.net/perlwikibot/?rev=36&view=rev
Author: rotemliss
Date: 2007-03-03 06:08:53 -0800 (Sat, 03 Mar 2007)
Log Message:
-----------
Set the ignore property to allow ignored folders in 'dumps'.
Property Changed:
----------------
trunk/dumps/
Property changes on: trunk/dumps
___________________________________________________________________
Name: svn:ignore
- *.xml
*.7z
*.bz2
*.gz
*.sql
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2007-03-03 13:27:36
|
Revision: 35
http://svn.sourceforge.net/perlwikibot/?rev=35&view=rev
Author: rotemliss
Date: 2007-03-03 05:27:36 -0800 (Sat, 03 Mar 2007)
Log Message:
-----------
Use the sysop account only when 'sysop' is specified as a parameter.
Modified Paths:
--------------
trunk/README
trunk/bot.pl
trunk/includes/functions.pm
Modified: trunk/README
===================================================================
--- trunk/README 2007-03-03 12:08:10 UTC (rev 34)
+++ trunk/README 2007-03-03 13:27:36 UTC (rev 35)
@@ -10,7 +10,4 @@
Now copy the files "configure.sample" and "runtime.sample" in the folder "config" to the names "configure.pm" and "runtime.pm", and configure them. You should first go to the configure file, then define the servers, then the actions, then go to the runtime file and define there what actions to execute, and most importantly - set "$sendPages" to 1, after the tests.
-After that, you can run the file "bot.pl", with the parameter of the site name. In later times, you can edit the configure file when you want to change the actions, and always edit the run-time file to enable and disable the page sends, and to define the actions to do.
-
-Tips:
-* Don't execute both sysop and non-sysop actions, because the sysop account will be used, even if it doesn't have a bot flag.
+After that, you can run the file "bot.pl", with the parameter of the site name, and the parameter "sysop" if you want to use the sysop user (if you don't use it, actions which require sysop rights won't be executed). In later times, you can edit the configure file when you want to change the actions, and always edit the run-time file to enable and disable the page sends, and to define the actions to do.
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2007-03-03 12:08:10 UTC (rev 34)
+++ trunk/bot.pl 2007-03-03 13:27:36 UTC (rev 35)
@@ -19,6 +19,13 @@
die "No server name set - please set server name!\n";
}
+my $sysop;
+if ( $ARGV[1] eq "sysop" ) {
+ $sysop = 1;
+} else {
+ $sysop = 0;
+}
+
# Show notes about the sending pages configuration
if ( $configure::sendPages == 0 ) {
print "Note: There will be no changes in the real server. Please set sendPages to 1 to do them.\n";
@@ -28,11 +35,11 @@
# Log in to the server only if you have to do some stuff in the server
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- functions::login();
+ functions::login( $sysop );
}
# All the matching pages in the XML file
-my @pages = functions::getMatchingPages();
+my @pages = functions::getMatchingPages( $sysop );
# Execute actions of all the pages
for ( my $i = 0; $i <= $#pages; $i += 2 ) {
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2007-03-03 12:08:10 UTC (rev 34)
+++ trunk/includes/functions.pm 2007-03-03 13:27:36 UTC (rev 35)
@@ -7,6 +7,9 @@
# Get all the pages matching specific restrictions
sub getMatchingPages {
+ # Parameters
+ my $sysop = @_;
+
# Get the pages
my $pages = mwdump->new( "dumps/".$configure::xmlFiles{ $ARGV[0] } );
@@ -15,8 +18,8 @@
die "Unable to handle any case setting besides \"first-letter\".\n";
}
- # Get the executed actions of the specified server
- my @possibleActions = filterActionsList( @configure::executedActions );
+ # Get the executed actions of the specified server and user
+ my @possibleActions = filterActionsList( $sysop, @configure::executedActions );
# An array contains all the pages
my @matchingPages;
@@ -101,13 +104,13 @@
# Filter actions list to the specified script server
sub filterActionsList {
# Get parameters
- my @originalList = @_;
+ my ( $sysop, @originalList ) = @_;
# Go over the actions list, and filter the actions which are not in the current server
my @newList;
my $counter = 0;
for ( my $i = 0; $i <= $#originalList; $i++ ) {
- if ( $configure::actionServers{ $originalList[$i] } eq $ARGV[0] ) {
+ if ( $configure::actionServers{ $originalList[$i] } eq $ARGV[0] && ( $sysop == 1 || $configure::actions ne "delete" ) ) {
$newList[$counter++] = $originalList[$i];
}
}
@@ -117,23 +120,9 @@
# Log in
sub login {
- # Is sysop finally required?
- my $sysop = 0;
+ # Get parameters
+ my ( $sysop ) = @_;
- # Go through the array, and check if sysop permission is needed.
- for ( my $i = 0; $i <= $#configure::executedActions; $i++ ) {
- my $action = $configure::actions{ $configure::executedActions[$i] };
- if ( $action eq "replace" ) {
- # Continue
- } elsif ($action eq "refresh") {
- # Continue
- } elsif ($action eq "delete") {
- $sysop = 1;
- } elsif ($action eq "move") {
- # Continue
- }
- }
-
# Get user name and password
my ( $username, $password );
if ( $sysop == 1 ) {
@@ -169,9 +158,9 @@
# Set initial edit summary
my $summary = $configure::initialEditSummaries{ $ARGV[0] };
- # Groups array
- my @executedActions = filterActionsList( @configure::executedActions );
- my @bywayActions = filterActionsList( @configure::bywayActions );
+ # Groups array (assume non-sysop, as it doesn't matter for edits)
+ my @executedActions = filterActionsList( 0, @configure::executedActions );
+ my @bywayActions = filterActionsList( 0, @configure::bywayActions );
my @actions = ( @executedActions, @bywayActions );
# Replaced something at all? Flag to check
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2007-03-03 12:08:10
|
Revision: 34
http://svn.sourceforge.net/perlwikibot/?rev=34&view=rev
Author: rotemliss
Date: 2007-03-03 04:08:10 -0800 (Sat, 03 Mar 2007)
Log Message:
-----------
Typos.
Modified Paths:
--------------
trunk/includes/actions.pm
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-26 17:53:23 UTC (rev 33)
+++ trunk/includes/actions.pm 2007-03-03 12:08:10 UTC (rev 34)
@@ -84,7 +84,7 @@
}
print "\tDone!\n";
} else {
- print "Error!\nThis may be a protected page you don't have permission to edit, or it has deleted since the dump file you use created.\n";
+ print "Error!\nThis may be a protected page you don't have permission to edit, or it was deleted since the dump file you use created.\n";
}
}
@@ -125,7 +125,7 @@
}
print "\tDone!\n";
} else {
- print "Error!\nYou may not have the permission to move pages, or there may already a page (which is not just a redirection) under the new title.\n";
+ print "Error!\nYou may not have the permission to move pages, or there may already be a page (which is not just a redirection) under the new title.\n";
}
}
@@ -155,7 +155,7 @@
}
print "\tDone!\n";
} else {
- print "Error!\nYou may not have the permission to delete pages, or the page is already deleted.\n";
+ print "Error!\nYou may not have the permission to delete pages, or the page was already deleted.\n";
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-26 17:53:31
|
Revision: 33
http://svn.sourceforge.net/perlwikibot/?rev=33&view=rev
Author: rotemliss
Date: 2006-09-26 10:53:23 -0700 (Tue, 26 Sep 2006)
Log Message:
-----------
Ignore prefix when refreshing.
Modified Paths:
--------------
trunk/includes/actions.pm
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-26 17:45:37 UTC (rev 32)
+++ trunk/includes/actions.pm 2006-09-26 17:53:23 UTC (rev 33)
@@ -8,10 +8,13 @@
# Get the contents of a page
sub getEditPage {
# Get parameters
- my ( $title ) = @_;
+ my ( $title, $ignorePrefix ) = @_;
+ if ( !defined( $ignorePrefix ) ) {
+ $ignorePrefix = 0;
+ }
my ( $page, $text );
- if ( $configure::sendPages == 2 ) {
+ if ( $configure::sendPages == 2 && $ignorePrefix == 0 ) {
# Add prefix
my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
my $prefixedTitle = $serverPrefix.$title;
@@ -51,12 +54,15 @@
# Edit page
sub editPage {
# Get parameters
- my ( $title, $summary, $text, $startTime, $editTime, $editToken ) = @_;
+ my ( $title, $summary, $text, $startTime, $editTime, $editToken, $ignorePrefix ) = @_;
+ if ( !defined( $ignorePrefix ) ) {
+ $ignorePrefix = 0;
+ }
print "Editing page $title...\n";
# Add prefix if necessary
- if ( $configure::sendPages == 2 ) {
+ if ( $configure::sendPages == 2 && $ignorePrefix == 0 ) {
my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
$title = $serverPrefix.$title;
}
@@ -87,8 +93,8 @@
# Get parameters
my ( $title ) = @_;
- # Null edit
- editPage( $title, "Refreshing page", getEditPage( $title ) );
+ # Null edit, ignore prefix
+ editPage( $title, "Refreshing page", getEditPage( $title ), 1 );
}
# Move page
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-26 17:45:47
|
Revision: 32
http://svn.sourceforge.net/perlwikibot/?rev=32&view=rev
Author: rotemliss
Date: 2006-09-26 10:45:37 -0700 (Tue, 26 Sep 2006)
Log Message:
-----------
Get the edit page contents via the edit page, if not adding prefix.
Modified Paths:
--------------
trunk/includes/actions.pm
trunk/includes/functions.pm
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-16 17:49:12 UTC (rev 31)
+++ trunk/includes/actions.pm 2006-09-26 17:45:37 UTC (rev 32)
@@ -6,30 +6,33 @@
use strict;
# Get the contents of a page
-sub getPageContents {
+sub getEditPage {
# Get parameters
my ( $title ) = @_;
- # Get page contents
- return http::getPage( $title, "raw" );
-}
+ my ( $page, $text );
+ if ( $configure::sendPages == 2 ) {
+ # Add prefix
+ my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
+ my $prefixedTitle = $serverPrefix.$title;
-# Edit page
-sub editPage {
- # Get parameters
- my ( $title, $text, $summary ) = @_;
+ # Get the edit page contents
+ $page = http::getPage( $prefixedTitle, "edit" );
+
+ # Get the page text
+ $text = http::getPage( $title, "raw" );
+ } else {
+ # Get the edit page contents
+ $page = http::getPage( $title, "edit" );
- print "Editing page $title...\n";
-
- # Add prefix if necessary
- if ( $configure::sendPages == 2 ) {
- my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
- $title = $serverPrefix.$title;
+ # Get the page text
+ $page =~ /<textarea tabindex='1' accesskey="." name="wpTextbox1" id="wpTextbox1" rows='\d+'\ncols='\d+' >([^<]*)<\/textarea>/;
+ $text = $1;
+ $text =~ s/</</g;
+ $text =~ s/>/>/g;
+ $text =~ s/&/&/g;
}
- # Get the edit page contents
- my $page = http::getPage( $title, "edit" );
-
# Get the start time
$page =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
my $startTime = $1;
@@ -42,6 +45,22 @@
$page =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
my $editToken = $1;
+ return ( $text, $startTime, $editTime, $editToken );
+}
+
+# Edit page
+sub editPage {
+ # Get parameters
+ my ( $title, $summary, $text, $startTime, $editTime, $editToken ) = @_;
+
+ print "Editing page $title...\n";
+
+ # Add prefix if necessary
+ if ( $configure::sendPages == 2 ) {
+ my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
+ $title = $serverPrefix.$title;
+ }
+
# Send page
if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
@@ -69,7 +88,7 @@
my ( $title ) = @_;
# Null edit
- editPage( $title, getPageContents( $title ), "Refreshing page" );
+ editPage( $title, "Refreshing page", getEditPage( $title ) );
}
# Move page
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 17:49:12 UTC (rev 31)
+++ trunk/includes/functions.pm 2006-09-26 17:45:37 UTC (rev 32)
@@ -163,8 +163,8 @@
# Send page
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- # Get the edited page contents
- my $page = actions::getPageContents( $title );
+ # Get the edit page
+ my ( $text, $startTime, $editTime, $editToken ) = actions::getEditPage( $title );
# Set initial edit summary
my $summary = $configure::initialEditSummaries{ $ARGV[0] };
@@ -181,9 +181,9 @@
for ( my $i = 0; $i <= $#actions; $i++ ) {
my $action = $actions[$i];
my $search = $configure::texts{ $action };
- if ( $page =~ /$search/ ) {
+ if ( $text =~ /$search/ ) {
my $replace = $configure::news{ $action };
- $page =~ s/$search/$replace/g;
+ $text =~ s/$search/$replace/g;
if ( $replaced == 1 ) {
$summary = "$summary, ";
}
@@ -194,7 +194,7 @@
# Edit page only of replaced something
if ( $replaced == 1 ) {
- actions::editPage( $title, $page, $summary );
+ actions::editPage( $title, $summary, $text, $startTime, $editTime, $editToken );
}
} else {
print "Replaced in page $title.\n";
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 17:49:28
|
Revision: 31
http://svn.sourceforge.net/perlwikibot/?rev=31&view=rev
Author: rotemliss
Date: 2006-09-16 10:49:12 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Credit for the includes/dump.pm file.
Modified Paths:
--------------
trunk/README
Modified: trunk/README
===================================================================
--- trunk/README 2006-09-16 16:59:10 UTC (rev 30)
+++ trunk/README 2006-09-16 17:49:12 UTC (rev 31)
@@ -3,6 +3,9 @@
This bot is copyrighted under the terms of GNU GPL. It's based on original work of both Costello and Felagund (Nadav) from Hebrew Wikipedia. Authors:
* Rotem Liss
+The file includes/dump.pm was downloaded from:
+http://search.cpan.org/~triddle/Parse-MediaWikiDump-0.31/
+
You should get the XML file includes all the pages, but only their current revision. It is called pages-meta-current.xml.bz2 in http://download.wikimedia.org/, and you have to use "maintenance/dumpBackup.php --current" to create it in your own installation. Now extract it in "dumps".
Now copy the files "configure.sample" and "runtime.sample" in the folder "config" to the names "configure.pm" and "runtime.pm", and configure them. You should first go to the configure file, then define the servers, then the actions, then go to the runtime file and define there what actions to execute, and most importantly - set "$sendPages" to 1, after the tests.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 16:59:16
|
Revision: 30
http://svn.sourceforge.net/perlwikibot/?rev=30&view=rev
Author: rotemliss
Date: 2006-09-16 09:59:10 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Deleting the old, obsolete files. They should not be used anymore, and all their features are currently exist in the new bot.
Removed Paths:
-------------
trunk/old/
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 16:58:14
|
Revision: 29
http://svn.sourceforge.net/perlwikibot/?rev=29&view=rev
Author: rotemliss
Date: 2006-09-16 09:58:08 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Removing a notice in the readme - the files are now more-or-less organised.
Modified Paths:
--------------
trunk/README
Modified: trunk/README
===================================================================
--- trunk/README 2006-09-16 15:15:32 UTC (rev 28)
+++ trunk/README 2006-09-16 16:58:08 UTC (rev 29)
@@ -1,7 +1,3 @@
-This bot files are currently not organised. Generally, use it only for testing.
-
-----
-
This is a bot which was written in Perl, for using on MediaWiki sites - especially Wikimedia Foundation projects.
This bot is copyrighted under the terms of GNU GPL. It's based on original work of both Costello and Felagund (Nadav) from Hebrew Wikipedia. Authors:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 15:15:39
|
Revision: 28
http://svn.sourceforge.net/perlwikibot/?rev=28&view=rev
Author: rotemliss
Date: 2006-09-16 08:15:32 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
General English, not Hebrew.
Modified Paths:
--------------
trunk/config/configure.sample
Modified: trunk/config/configure.sample
===================================================================
--- trunk/config/configure.sample 2006-09-16 15:12:13 UTC (rev 27)
+++ trunk/config/configure.sample 2006-09-16 15:15:32 UTC (rev 28)
@@ -42,16 +42,16 @@
"action" => "server",
); # Which server should it be executed on?
our %titles = (
- "action" => "^שיחת משתמש:[\\d]+\\.[\\d]+\\.[\\d]+\\.[\\d]+\$",
+ "action" => "^User_talk:[\\d]+\\.[\\d]+\\.[\\d]+\\.[\\d]+\$",
); # Restrict the actions to these titles
our %texts = (
- "action" => "^אנונימי\$",
+ "action" => "^Anonymous\$",
); # Restrict the actions to the pages
our %news = (
- "action" => "שיחת משתמש:אנונימי",
+ "action" => "User_talk:Anonymous",
); # New title (move) or text (replace), leave blank otherwise
our %reasons = (
- "action" => "מחליף שיחות אנונימיות",
+ "action" => "Anonymous talk changer",
); # Summary/reason sent about the action
our %minimumTimes = (
"action" => timegm( 0, 0, 0, 1, 1 - 1, 0 + 70 ),
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 15:12:22
|
Revision: 27
http://svn.sourceforge.net/perlwikibot/?rev=27&view=rev
Author: rotemliss
Date: 2006-09-16 08:12:13 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Comparing diff of times.
Modified Paths:
--------------
trunk/bot.pl
trunk/config/configure.sample
trunk/includes/functions.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 14:38:57 UTC (rev 26)
+++ trunk/bot.pl 2006-09-16 15:12:13 UTC (rev 27)
@@ -6,6 +6,7 @@
# Libraries
use LWP::UserAgent;
+use Time::Local;
use includes::actions;
use includes::dump;
use includes::functions;
Modified: trunk/config/configure.sample
===================================================================
--- trunk/config/configure.sample 2006-09-16 14:38:57 UTC (rev 26)
+++ trunk/config/configure.sample 2006-09-16 15:12:13 UTC (rev 27)
@@ -5,9 +5,6 @@
use warnings;
use strict;
-# Libraries
-use Time::local;
-
# Available servers and details about them, the bot should work on
our %servers = (
"server" => "http://www.server.com",
@@ -57,7 +54,7 @@
"action" => "מחליף שיחות אנונימיות",
); # Summary/reason sent about the action
our %minimumTimes = (
- "action" => timegm( 0, 0, 0, 1, 1 - 1, 1970 - 1900 ),
-); # The minimum diff between the current time and the last edit time
+ "action" => timegm( 0, 0, 0, 1, 1 - 1, 0 + 70 ),
+); # The minimum diff between the current time and the last edit time (use: seconds, minutes, hours, day in month, month, year number; 1 January 0 00:00:00 means no minimum time, 2 January 0 00:00:00 means that the last edit should be at least one day old, etc.; edit only the first numbers, as the second ones are used for compatibility to the function)
return 1;
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 14:38:57 UTC (rev 26)
+++ trunk/includes/functions.pm 2006-09-16 15:12:13 UTC (rev 27)
@@ -58,7 +58,7 @@
$tempCounter = 0;
for ( my $i = 0; $i <= $#pagePossibleActions; $i++ ) {
my $wantedText = $configure::texts{ $pagePossibleActions[$i] };
- if ($$text =~ /$wantedText/) {
+ if ( $$text =~ /$wantedText/ ) {
$tempTextArray[$tempCounter++] = $pagePossibleActions[$i];
}
}
@@ -69,6 +69,26 @@
next;
}
+ # Check if the page last edit is after the minimal time
+ my @tempLastEditArray;
+ my $lastEdit = $page->timestamp;
+ $lastEdit =~ /([\d]+)-([\d]+)-([\d]+)T([\d]+):([\d]+):([\d]+)Z/;
+ $lastEdit = main::timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
+ my $betweenTime = time() - $lastEdit;
+ $tempCounter = 0;
+ for ( my $i = 0; $i <= $#pagePossibleActions; $i++ ) {
+ my $minimumTime = $configure::minimumTimes{ $pagePossibleActions[$i] };
+ if ( $betweenTime > $minimumTime ) {
+ $tempLastEditArray[$tempCounter++] = $pagePossibleActions[$i];
+ }
+ }
+
+ # Continue if there are no matching pages
+ @pagePossibleActions = @tempLastEditArray;
+ if ( $#pagePossibleActions < 0 ) {
+ next;
+ }
+
# All the previous checks were OK - add the page, using the first available action
$matchingPages[$counter++] = $title;
$matchingPages[$counter++] = $pagePossibleActions[0];
@@ -78,36 +98,6 @@
return @matchingPages;
}
-# Get page last edit
-# TODO: Move to getMatchingPages
-sub getPageLastEdit {
- # Get parameters
- my ( $xmlFile, $title ) = @_;
- utf8::decode( $title );
-
- # Get the pages
- my $pages = mwdump->new($xmlFile);
-
- # Can use only "first-letter", which is also the only currently known value,
- # but there could be more in the future
- if ( $pages->case ne "first-letter" ) {
- die "Unable to handle any case setting besides \"first-letter\".\n";
- }
-
- # Searching for the page, and getting its timestamp
- my $lastEditTimestamp = "0";
- while ( $lastEditTimestamp eq "0" && ( my $page = $pages->page ) ) {
- if ( $title eq $page->title ) {
- $lastEditTimestamp = $page->timestamp;
- }
- }
-
- # Check the time by RegExp
- $lastEditTimestamp =~ /([\d]+)-([\d]+)-([\d]+)T([\d]+):([\d]+):([\d]+)Z/;
-
- return timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
-}
-
# Filter actions list to the specified script server
sub filterActionsList {
# Get parameters
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 14:39:07
|
Revision: 26
http://svn.sourceforge.net/perlwikibot/?rev=26&view=rev
Author: rotemliss
Date: 2006-09-16 07:38:57 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Allowing page moves, the bot is now pretty much completed for very general uses - now it should be simplified, allow other uses, etc. The moves bug is still exist, but it was exist also in the old files.
Modified Paths:
--------------
trunk/bot.pl
trunk/includes/functions.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 14:07:18 UTC (rev 25)
+++ trunk/bot.pl 2006-09-16 14:38:57 UTC (rev 26)
@@ -40,11 +40,13 @@
my $action = $pages[$i + 1];
my $actionName = $configure::actions{ $action };
- # Execute the action (TODO: add move and delete)
+ # Execute the action
if ( $actionName eq "replace" ) {
functions::replaceInPage( $title, $action );
} elsif ( $actionName eq "refresh" ) {
functions::refreshPage( $title, $action );
+ } elsif ( $actionName eq "move" ) {
+ functions::movePage( $title, $action );
} elsif ( $actionName eq "delete" ) {
functions::deletePage( $title, $action );
}
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 14:07:18 UTC (rev 25)
+++ trunk/includes/functions.pm 2006-09-16 14:38:57 UTC (rev 26)
@@ -224,6 +224,23 @@
}
}
+# Move page
+sub movePage {
+ # Get parameters
+ my ( $oldTitle, $action ) = @_;
+
+ # Send page
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ my $oldTitles = $configure::titles{ $action };
+ my $newTitles = $configure::news{ $action };
+ my $newTitle = $oldTitle;
+ $newTitle =~ s/$oldTitles/$newTitles/; # FIXME: the frequent use of $1 or \1 does not work, therefore this feature is pretty useless; couldn't work around it so far
+ actions::movePage( $oldTitle, $newTitle, $configure::reasons{ $action } );
+ } else {
+ print "Moved page $oldTitle.\n";
+ }
+}
+
# Delete page
sub deletePage {
# Get parameters
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 14:07:29
|
Revision: 25
http://svn.sourceforge.net/perlwikibot/?rev=25&view=rev
Author: rotemliss
Date: 2006-09-16 07:07:18 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Allowing automated page deletions, and several fixes.
Modified Paths:
--------------
trunk/bot.pl
trunk/includes/actions.pm
trunk/includes/functions.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 13:28:27 UTC (rev 24)
+++ trunk/bot.pl 2006-09-16 14:07:18 UTC (rev 25)
@@ -37,12 +37,15 @@
for ( my $i = 0; $i <= $#pages; $i += 2 ) {
# Get title and action
my $title = $pages[$i];
- my $action = $configure::actions{ $pages[$i + 1] };
+ my $action = $pages[$i + 1];
+ my $actionName = $configure::actions{ $action };
# Execute the action (TODO: add move and delete)
- if ( $action eq "replace" ) {
- functions::replaceInPage( $title );
- } elsif ( $action eq "refresh" ) {
- functions::refreshPage( $title );
+ if ( $actionName eq "replace" ) {
+ functions::replaceInPage( $title, $action );
+ } elsif ( $actionName eq "refresh" ) {
+ functions::refreshPage( $title, $action );
+ } elsif ( $actionName eq "delete" ) {
+ functions::deletePage( $title, $action );
}
}
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-16 13:28:27 UTC (rev 24)
+++ trunk/includes/actions.pm 2006-09-16 14:07:18 UTC (rev 25)
@@ -17,7 +17,7 @@
# Edit page
sub editPage {
# Get parameters
- my ( $title, $text, $editSummary ) = @_;
+ my ( $title, $text, $summary ) = @_;
print "Editing page $title...\n";
@@ -51,7 +51,7 @@
wpStarttime => $startTime,
wpEdittime => $editTime,
wpEditToken => $editToken,
- wpSummary => $editSummary,
+ wpSummary => $summary,
wpTextbox1 => $text,
wpMinoredit => 1,
],
@@ -75,12 +75,12 @@
# Move page
sub movePage {
# Get parameters
- my ( $title, $newTitle, $reason ) = @_;
+ my ( $oldTitle, $newTitle, $reason ) = @_;
- print "Moving page $title to $newTitle...\n";
+ print "Moving page $oldTitle to $newTitle...\n";
# Get the delete page contents
- my $page = http::getPage( "Special:Movepage/$title" );
+ my $page = http::getPage( "Special:Movepage/$oldTitle" );
# Get edit token
$page =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
@@ -91,7 +91,7 @@
if ( $configure::sendPages == 1 ) {
http::postPage( "Special:Movepage", "submit",
[
- wpOldTitle => $title,
+ wpOldTitle => $oldTitle,
wpNewTitle => $newTitle,
wpReason => $reason,
wpEditToken => $editToken,
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 13:28:27 UTC (rev 24)
+++ trunk/includes/functions.pm 2006-09-16 14:07:18 UTC (rev 25)
@@ -169,14 +169,15 @@
# Replace regular expressions in the page
sub replaceInPage {
# Get parameters
- my ($title) = @_;
+ my ( $title, $action ) = @_;
+ # Send page
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
# Get the edited page contents
- my $editedPage = actions::getPageContents( $title );
+ my $page = actions::getPageContents( $title );
# Set initial edit summary
- my $editSummary = $configure::initialEditSummaries{ $ARGV[0] };
+ my $summary = $configure::initialEditSummaries{ $ARGV[0] };
# Groups array
my @executedActions = filterActionsList( @configure::executedActions );
@@ -190,20 +191,20 @@
for ( my $i = 0; $i <= $#actions; $i++ ) {
my $action = $actions[$i];
my $search = $configure::texts{ $action };
- if ( $editedPage =~ /$search/ ) {
+ if ( $page =~ /$search/ ) {
my $replace = $configure::news{ $action };
- $editedPage =~ s/$search/$replace/g;
+ $page =~ s/$search/$replace/g;
if ( $replaced == 1 ) {
- $editSummary = "$editSummary, ";
+ $summary = "$summary, ";
}
- $editSummary = $editSummary.$configure::reasons{ $action };
+ $summary = $summary.$configure::reasons{ $action };
$replaced = 1;
}
}
# Edit page only of replaced something
if ( $replaced == 1 ) {
- actions::editPage( $title, $editedPage, $editSummary );
+ actions::editPage( $title, $page, $summary );
}
} else {
print "Replaced in page $title.\n";
@@ -213,8 +214,9 @@
# Refresh page
sub refreshPage {
# Get parameters
- my ( $title ) = @_;
+ my ( $title, $action ) = @_;
+ # Send page
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
actions::refreshPage( $title );
} else {
@@ -225,10 +227,11 @@
# Delete page
sub deletePage {
# Get parameters
- my ( $title ) = @_;
+ my ( $title, $action ) = @_;
+ # Send page
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- actions::deletePage( $title );
+ actions::deletePage( $title, $configure::reasons{ $action } );
} else {
print "Deleted page $title.\n";
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 13:28:34
|
Revision: 24
http://svn.sourceforge.net/perlwikibot/?rev=24&view=rev
Author: rotemliss
Date: 2006-09-16 06:28:27 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Fixes, now OK.
Modified Paths:
--------------
trunk/includes/actions.pm
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-16 13:26:26 UTC (rev 23)
+++ trunk/includes/actions.pm 2006-09-16 13:28:27 UTC (rev 24)
@@ -28,18 +28,18 @@
}
# Get the edit page contents
- my $editPage = http::getPage( $title, "edit" );
+ my $page = http::getPage( $title, "edit" );
# Get the start time
- $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
+ $page =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
my $startTime = $1;
# Get the edit time
- $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpEdittime" \/>/;
+ $page =~ /<input type='hidden' value="([0-9]{14})" name="wpEdittime" \/>/;
my $editTime = $1;
# Get edit token
- $editPage =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
+ $page =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
my $editToken = $1;
# Send page
@@ -80,10 +80,10 @@
print "Moving page $title to $newTitle...\n";
# Get the delete page contents
- my $movePage = http::getPage( "Special:Movepage/$title" );
+ my $page = http::getPage( "Special:Movepage/$title" );
# Get edit token
- $editPage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
+ $page =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
my $editToken = $1;
# Send page
@@ -112,10 +112,10 @@
print "Deleting page $title...\n";
# Get the delete page contents
- my $deletePage = http::getPage( $title, "delete" );
+ my $page = http::getPage( $title, "delete" );
# Get edit token
- $editPage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
+ $page =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
my $editToken = $1;
# Send page
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 13:26:33
|
Revision: 23
http://svn.sourceforge.net/perlwikibot/?rev=23&view=rev
Author: rotemliss
Date: 2006-09-16 06:26:26 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Fixes, now broken.
Modified Paths:
--------------
trunk/includes/actions.pm
trunk/includes/functions.pm
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-16 13:06:38 UTC (rev 22)
+++ trunk/includes/actions.pm 2006-09-16 13:26:26 UTC (rev 23)
@@ -75,24 +75,25 @@
# Move page
sub movePage {
# Get parameters
- my ( $title, $newTitle, $moveReason ) = @_;
+ my ( $title, $newTitle, $reason ) = @_;
print "Moving page $title to $newTitle...\n";
# Get the delete page contents
my $movePage = http::getPage( "Special:Movepage/$title" );
- # Get the edit token
- if ($movePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
- my $editToken = $1;
+ # Get edit token
+ $editPage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
+ my $editToken = $1;
- # Send page
+ # Send page
+ if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
if ( $configure::sendPages == 1 ) {
http::postPage( "Special:Movepage", "submit",
[
wpOldTitle => $title,
wpNewTitle => $newTitle,
- wpReason => $moveReason,
+ wpReason => $reason,
wpEditToken => $editToken,
],
);
@@ -113,11 +114,12 @@
# Get the delete page contents
my $deletePage = http::getPage( $title, "delete" );
- # Get the edit token
- if ($deletePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
- my $editToken = $1;
+ # Get edit token
+ $editPage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/;
+ my $editToken = $1;
- # Send page
+ # Send page
+ if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
if ( $configure::sendPages == 1 ) {
http::postPage( $title, "delete",
[
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 13:06:38 UTC (rev 22)
+++ trunk/includes/functions.pm 2006-09-16 13:26:26 UTC (rev 23)
@@ -222,15 +222,15 @@
}
}
-# Refresh page
+# Delete page
sub deletePage {
# Get parameters
my ( $title ) = @_;
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- actions::deletePage( $title, getPageContents( $title ), "Refreshing page" );
+ actions::deletePage( $title );
} else {
- print "Refreshed page $title.\n";
+ print "Deleted page $title.\n";
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 13:06:45
|
Revision: 22
http://svn.sourceforge.net/perlwikibot/?rev=22&view=rev
Author: rotemliss
Date: 2006-09-16 06:06:38 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Adding the ability to specify a server for each action, and ignoring actions which are not executed on the specified server.
Modified Paths:
--------------
trunk/config/configure.sample
trunk/includes/functions.pm
Modified: trunk/config/configure.sample
===================================================================
--- trunk/config/configure.sample 2006-09-16 12:48:10 UTC (rev 21)
+++ trunk/config/configure.sample 2006-09-16 13:06:38 UTC (rev 22)
@@ -43,7 +43,7 @@
); # The global actions done
our %actionServers = (
"action" => "server",
-); # Which server should it be executed on? TODO - allow using multiple servers like that, currently ignored
+); # Which server should it be executed on?
our %titles = (
"action" => "^שיחת משתמש:[\\d]+\\.[\\d]+\\.[\\d]+\\.[\\d]+\$",
); # Restrict the actions to these titles
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 12:48:10 UTC (rev 21)
+++ trunk/includes/functions.pm 2006-09-16 13:06:38 UTC (rev 22)
@@ -15,8 +15,8 @@
die "Unable to handle any case setting besides \"first-letter\".\n";
}
- # Get the executed actions
- my @possibleActions = @configure::executedActions;
+ # Get the executed actions of the specified server
+ my @possibleActions = filterActionsList( @configure::executedActions );
# An array contains all the pages
my @matchingPages;
@@ -108,6 +108,23 @@
return timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
}
+# Filter actions list to the specified script server
+sub filterActionsList {
+ # Get parameters
+ my @originalList = @_;
+
+ # Go over the actions list, and filter the actions which are not in the current server
+ my @newList;
+ my $counter = 0;
+ for ( my $i = 0; $i <= $#originalList; $i++ ) {
+ if ( $configure::actionServers{ $originalList[$i] } eq $ARGV[0] ) {
+ $newList[$counter++] = $originalList[$i];
+ }
+ }
+
+ return @newList;
+}
+
# Log in
sub login {
# Is sysop finally required?
@@ -162,7 +179,9 @@
my $editSummary = $configure::initialEditSummaries{ $ARGV[0] };
# Groups array
- my @actions = ( @configure::executedActions, @configure::bywayActions );
+ my @executedActions = filterActionsList( @configure::executedActions );
+ my @bywayActions = filterActionsList( @configure::bywayActions );
+ my @actions = ( @executedActions, @bywayActions );
# Replaced something at all? Flag to check
my $replaced = 0;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 12:48:19
|
Revision: 21
http://svn.sourceforge.net/perlwikibot/?rev=21&view=rev
Author: rotemliss
Date: 2006-09-16 05:48:10 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Several fixes.
Modified Paths:
--------------
trunk/bot.pl
trunk/includes/actions.pm
trunk/includes/functions.pm
trunk/includes/http.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 12:30:18 UTC (rev 20)
+++ trunk/bot.pl 2006-09-16 12:48:10 UTC (rev 21)
@@ -5,13 +5,14 @@
use strict;
# Libraries
+use LWP::UserAgent;
+use includes::actions;
+use includes::dump;
use includes::functions;
+use includes::http;
use config::configure;
use config::runtime;
-# Counters
-my ( $i, $j, $k, $l, $m );
-
# Do nothing if no server specified
if ( !$ARGV[0] ) {
die "No server name set - please set server name!\n";
@@ -33,7 +34,7 @@
my @pages = functions::getMatchingPages();
# Execute actions of all the pages
-for ( $i = 0; $i <= $#pages; $i += 2 ) {
+for ( my $i = 0; $i <= $#pages; $i += 2 ) {
# Get title and action
my $title = $pages[$i];
my $action = $configure::actions{ $pages[$i + 1] };
Modified: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm 2006-09-16 12:30:18 UTC (rev 20)
+++ trunk/includes/actions.pm 2006-09-16 12:48:10 UTC (rev 21)
@@ -5,55 +5,6 @@
use warnings;
use strict;
-# Libraries
-use includes::http;
-use config::configure;
-use config::runtime;
-
-# Counters
-my ( $i, $j, $k, $l, $m );
-
-# Log in
-sub login {
- # Is sysop finally required?
- my $sysop = 0;
-
- # Go through the array, and check if sysop permission is needed.
- for ( $i = 0; $i <= $#configure::executedActions; $i++ ) {
- my $action = $configure::actions{ $configure::executedActions[$i] };
- if ( $action eq "replace" ) {
- # Continue
- } elsif ($action eq "refresh") {
- # Continue
- } elsif ($action eq "delete") {
- $sysop = 1;
- } elsif ($action eq "move") {
- # Continue
- }
- }
-
- # Get user name and password
- my ( $username, $password );
- if ( $sysop == 1 ) {
- $username = $configure::sysopUserNames{ $ARGV[0] };
- $password = $configure::sysopPasswords{ $ARGV[0] };
- } else {
- $username = $configure::userNames{ $ARGV[0] };
- $password = $configure::passwords{ $ARGV[0] };
- }
-
- # Log in
- print "Logging in...\n";
- http::postPage( "Special:Userlogin", "submitlogin",
- [
- wpName => $username,
- wpPassword => $password,
- wpRemember => 0,
- ],
- , "type=login" );
- print "\tDone!\n";
-}
-
# Get the contents of a page
sub getPageContents {
# Get parameters
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 12:30:18 UTC (rev 20)
+++ trunk/includes/functions.pm 2006-09-16 12:48:10 UTC (rev 21)
@@ -5,15 +5,6 @@
use warnings;
use strict;
-# Libraries
-use includes::actions;
-use includes::dump;
-use config::configure;
-use config::runtime;
-
-# Counters
-my ( $i, $j, $k, $l, $m );
-
# Get all the pages matching specific restrictions
sub getMatchingPages {
# Get the pages
@@ -47,7 +38,7 @@
my $title = $page->title;
utf8::encode( $title );
$tempCounter = 0;
- for ( $i = 0; $i <= $#pagePossibleActions; $i++ ) {
+ for ( my $i = 0; $i <= $#pagePossibleActions; $i++ ) {
my $wantedTitle = $configure::titles{ $pagePossibleActions[$i] };
if ( $title =~ /$wantedTitle/ ) {
$tempTitleArray[$tempCounter++] = $pagePossibleActions[$i];
@@ -65,7 +56,7 @@
my $text = $page->text;
utf8::encode( $$text );
$tempCounter = 0;
- for ( $i = 0; $i <= $#pagePossibleActions; $i++ ) {
+ for ( my $i = 0; $i <= $#pagePossibleActions; $i++ ) {
my $wantedText = $configure::texts{ $pagePossibleActions[$i] };
if ($$text =~ /$wantedText/) {
$tempTextArray[$tempCounter++] = $pagePossibleActions[$i];
@@ -117,6 +108,47 @@
return timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
}
+# Log in
+sub login {
+ # Is sysop finally required?
+ my $sysop = 0;
+
+ # Go through the array, and check if sysop permission is needed.
+ for ( my $i = 0; $i <= $#configure::executedActions; $i++ ) {
+ my $action = $configure::actions{ $configure::executedActions[$i] };
+ if ( $action eq "replace" ) {
+ # Continue
+ } elsif ($action eq "refresh") {
+ # Continue
+ } elsif ($action eq "delete") {
+ $sysop = 1;
+ } elsif ($action eq "move") {
+ # Continue
+ }
+ }
+
+ # Get user name and password
+ my ( $username, $password );
+ if ( $sysop == 1 ) {
+ $username = $configure::sysopUserNames{ $ARGV[0] };
+ $password = $configure::sysopPasswords{ $ARGV[0] };
+ } else {
+ $username = $configure::userNames{ $ARGV[0] };
+ $password = $configure::passwords{ $ARGV[0] };
+ }
+
+ # Log in
+ print "Logging in...\n";
+ http::postPage( "Special:Userlogin", "submitlogin",
+ [
+ wpName => $username,
+ wpPassword => $password,
+ wpRemember => 0,
+ ],
+ , "type=login" );
+ print "\tDone!\n";
+}
+
# Replace regular expressions in the page
sub replaceInPage {
# Get parameters
@@ -124,7 +156,7 @@
if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
# Get the edited page contents
- my $editedPage = getPageContents( $title );
+ my $editedPage = actions::getPageContents( $title );
# Set initial edit summary
my $editSummary = $configure::initialEditSummaries{ $ARGV[0] };
@@ -136,7 +168,7 @@
my $replaced = 0;
# Replace regular expressions
- for ( $i = 0; $i <= $#actions; $i++ ) {
+ for ( my $i = 0; $i <= $#actions; $i++ ) {
my $action = $actions[$i];
my $search = $configure::texts{ $action };
if ( $editedPage =~ /$search/ ) {
Modified: trunk/includes/http.pm
===================================================================
--- trunk/includes/http.pm 2006-09-16 12:30:18 UTC (rev 20)
+++ trunk/includes/http.pm 2006-09-16 12:48:10 UTC (rev 21)
@@ -5,10 +5,6 @@
use warnings;
use strict;
-# Libraries
-use LWP;
-use config::configure;
-
# The browser we use to surf; enable cookies, and use a specific user agent
my $browser = LWP::UserAgent->new();
$browser->cookie_jar( {} );
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 12:30:29
|
Revision: 20
http://svn.sourceforge.net/perlwikibot/?rev=20&view=rev
Author: rotemliss
Date: 2006-09-16 05:30:18 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Splitting the actual actions on the server to another file, adding the refreshing, will add all the other features soon; may be broken for now.
Modified Paths:
--------------
trunk/bot.pl
trunk/includes/functions.pm
trunk/includes/http.pm
Added Paths:
-----------
trunk/includes/actions.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 11:45:57 UTC (rev 19)
+++ trunk/bot.pl 2006-09-16 12:30:18 UTC (rev 20)
@@ -38,8 +38,10 @@
my $title = $pages[$i];
my $action = $configure::actions{ $pages[$i + 1] };
- # Execute the action (TODO: add the actions which were supported in the previous bot)
+ # Execute the action (TODO: add move and delete)
if ( $action eq "replace" ) {
functions::replaceInPage( $title );
+ } elsif ( $action eq "refresh" ) {
+ functions::refreshPage( $title );
}
}
Added: trunk/includes/actions.pm
===================================================================
--- trunk/includes/actions.pm (rev 0)
+++ trunk/includes/actions.pm 2006-09-16 12:30:18 UTC (rev 20)
@@ -0,0 +1,184 @@
+# Package name
+package actions;
+
+# Code style
+use warnings;
+use strict;
+
+# Libraries
+use includes::http;
+use config::configure;
+use config::runtime;
+
+# Counters
+my ( $i, $j, $k, $l, $m );
+
+# Log in
+sub login {
+ # Is sysop finally required?
+ my $sysop = 0;
+
+ # Go through the array, and check if sysop permission is needed.
+ for ( $i = 0; $i <= $#configure::executedActions; $i++ ) {
+ my $action = $configure::actions{ $configure::executedActions[$i] };
+ if ( $action eq "replace" ) {
+ # Continue
+ } elsif ($action eq "refresh") {
+ # Continue
+ } elsif ($action eq "delete") {
+ $sysop = 1;
+ } elsif ($action eq "move") {
+ # Continue
+ }
+ }
+
+ # Get user name and password
+ my ( $username, $password );
+ if ( $sysop == 1 ) {
+ $username = $configure::sysopUserNames{ $ARGV[0] };
+ $password = $configure::sysopPasswords{ $ARGV[0] };
+ } else {
+ $username = $configure::userNames{ $ARGV[0] };
+ $password = $configure::passwords{ $ARGV[0] };
+ }
+
+ # Log in
+ print "Logging in...\n";
+ http::postPage( "Special:Userlogin", "submitlogin",
+ [
+ wpName => $username,
+ wpPassword => $password,
+ wpRemember => 0,
+ ],
+ , "type=login" );
+ print "\tDone!\n";
+}
+
+# Get the contents of a page
+sub getPageContents {
+ # Get parameters
+ my ( $title ) = @_;
+
+ # Get page contents
+ return http::getPage( $title, "raw" );
+}
+
+# Edit page
+sub editPage {
+ # Get parameters
+ my ( $title, $text, $editSummary ) = @_;
+
+ print "Editing page $title...\n";
+
+ # Add prefix if necessary
+ if ( $configure::sendPages == 2 ) {
+ my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
+ $title = $serverPrefix.$title;
+ }
+
+ # Get the edit page contents
+ my $editPage = http::getPage( $title, "edit" );
+
+ # Get the start time
+ $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
+ my $startTime = $1;
+
+ # Get the edit time
+ $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpEdittime" \/>/;
+ my $editTime = $1;
+
+ # Get edit token
+ $editPage =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
+ my $editToken = $1;
+
+ # Send page
+ if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ http::postPage( $title, "submit",
+ [
+ wpSection => "",
+ wpStarttime => $startTime,
+ wpEdittime => $editTime,
+ wpEditToken => $editToken,
+ wpSummary => $editSummary,
+ wpTextbox1 => $text,
+ wpMinoredit => 1,
+ ],
+ );
+ }
+ print "\tDone!\n";
+ } else {
+ print "Error!\nThis may be a protected page you don't have permission to edit, or it has deleted since the dump file you use created.\n";
+ }
+}
+
+# Refresh page
+sub refreshPage {
+ # Get parameters
+ my ( $title ) = @_;
+
+ # Null edit
+ editPage( $title, getPageContents( $title ), "Refreshing page" );
+}
+
+# Move page
+sub movePage {
+ # Get parameters
+ my ( $title, $newTitle, $moveReason ) = @_;
+
+ print "Moving page $title to $newTitle...\n";
+
+ # Get the delete page contents
+ my $movePage = http::getPage( "Special:Movepage/$title" );
+
+ # Get the edit token
+ if ($movePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
+ my $editToken = $1;
+
+ # Send page
+ if ( $configure::sendPages == 1 ) {
+ http::postPage( "Special:Movepage", "submit",
+ [
+ wpOldTitle => $title,
+ wpNewTitle => $newTitle,
+ wpReason => $moveReason,
+ wpEditToken => $editToken,
+ ],
+ );
+ }
+ print "\tDone!\n";
+ } else {
+ print "Error!\nYou may not have the permission to move pages, or there may already a page (which is not just a redirection) under the new title.\n";
+ }
+}
+
+# Delete page
+sub deletePage {
+ # Get parameters
+ my ( $title, $reason ) = @_;
+
+ print "Deleting page $title...\n";
+
+ # Get the delete page contents
+ my $deletePage = http::getPage( $title, "delete" );
+
+ # Get the edit token
+ if ($deletePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
+ my $editToken = $1;
+
+ # Send page
+ if ( $configure::sendPages == 1 ) {
+ http::postPage( $title, "delete",
+ [
+ wpReason => $reason,
+ wpEditToken => $editToken,
+ ],
+ );
+ }
+ print "\tDone!\n";
+ } else {
+ print "Error!\nYou may not have the permission to delete pages, or the page is already deleted.\n";
+ }
+}
+
+return 1;
Property changes on: trunk/includes/actions.pm
___________________________________________________________________
Name: svn:eol-style
+ native
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 11:45:57 UTC (rev 19)
+++ trunk/includes/functions.pm 2006-09-16 12:30:18 UTC (rev 20)
@@ -6,8 +6,8 @@
use strict;
# Libraries
+use includes::actions;
use includes::dump;
-use includes::http;
use config::configure;
use config::runtime;
@@ -117,103 +117,6 @@
return timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
}
-# Log in
-sub login {
- # Is sysop finally required?
- my $sysop = 0;
-
- # Go through the array, and check if sysop permission is needed.
- for ( $i = 0; $i <= $#configure::executedActions; $i++ ) {
- my $action = $configure::actions{ $configure::executedActions[$i] };
- if ( $action eq "replace" ) {
- # Continue
- } elsif ($action eq "refresh") {
- # Continue
- } elsif ($action eq "delete") {
- $sysop = 1;
- } elsif ($action eq "move") {
- # Continue
- }
- }
-
- # Get user name and password
- my ( $username, $password );
- if ( $sysop == 1 ) {
- $username = $configure::sysopUserNames{ $ARGV[0] };
- $password = $configure::sysopPasswords{ $ARGV[0] };
- } else {
- $username = $configure::userNames{ $ARGV[0] };
- $password = $configure::passwords{ $ARGV[0] };
- }
-
- # Log in
- print "Logging in...\n";
- http::postPage( "Special:Userlogin", "submitlogin",
- [
- wpName => $username,
- wpPassword => $password,
- wpRemember => 0,
- ],
- , "type=login" );
- print "\tDone!\n";
-}
-
-# Get the contents of a page
-sub getPageContents {
- # Get parameters
- my ( $title ) = @_;
-
- return http::getPage( $title, "raw" );
-}
-
-# Edit page
-sub editPage {
- # Get parameters
- my ( $title, $text, $editSummary ) = @_;
-
- print "Editing page $title...\n";
-
- if ( $configure::sendPages == 2 ) {
- my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
- $title = $serverPrefix.$title;
- }
-
- # Get the edit page contents
- my $editPage = http::getPage( $title, "edit" );
-
- # Get the start time
- $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
- my $startTime = $1;
-
- # Get the edit time
- $editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpEdittime" \/>/;
- my $editTime = $1;
-
- # Get edit token
- $editPage =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
- my $editToken = $1;
-
- # Send page
- if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
- if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
- http::postPage( $title, "submit",
- [
- wpSection => "",
- wpStarttime => $startTime,
- wpEdittime => $editTime,
- wpEditToken => $editToken,
- wpSummary => $editSummary,
- wpTextbox1 => $text,
- wpMinoredit => 1,
- ],
- );
- }
- print "\tDone!\n";
- } else {
- print "Error!\nThis may be a protected page you don't have permission to edit, or it has deleted since the dump file you use created.\n";
- }
-}
-
# Replace regular expressions in the page
sub replaceInPage {
# Get parameters
@@ -249,7 +152,7 @@
# Edit page only of replaced something
if ( $replaced == 1 ) {
- editPage( $title, $editedPage, $editSummary );
+ actions::editPage( $title, $editedPage, $editSummary );
}
} else {
print "Replaced in page $title.\n";
@@ -259,70 +162,24 @@
# Refresh page
sub refreshPage {
# Get parameters
- my ( $server, $title ) = @_;
+ my ( $title ) = @_;
- editPage( $title, getPageContents( $title ), "Refreshing page" );
-}
-
-# Move page
-sub movePage {
- # Get parameters
- my ( $server, $title, $newTitle, $moveReason ) = @_;
-
- print "Moving page $title to $newTitle...\n";
-
- # Get the delete page contents
- my $movePage = http::getPage( "Special:Movepage/$title" );
-
- # Get the edit token
- if ($movePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
- my $editToken = $1;
-
- # Send page
- if ( $configure::sendPages == 1 ) {
- http::postPage( "Special:Movepage", "submit",
- [
- wpOldTitle => $title,
- wpNewTitle => $newTitle,
- wpReason => $moveReason,
- wpEditToken => $editToken,
- ],
- );
- }
- print "\tDone!\n";
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ actions::refreshPage( $title );
+ } else {
+ print "Refreshed page $title.\n";
}
- else
- {
- print "Error!\nYou may not have the permission to move pages, or there may already a page (which is not just a redirection) under the new title.\n";
- }
}
-# Delete page
+# Refresh page
sub deletePage {
# Get parameters
- my ( $server, $title, $reason ) = @_;
+ my ( $title ) = @_;
- print "Deleting page $title...\n";
-
- # Get the delete page contents
- my $deletePage = http::getPage( $title, "delete" );
-
- # Get the edit token
- if ($deletePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
- my $editToken = $1;
-
- # Send page
- if ( $configure::sendPages == 1 ) {
- http::postPage( $title, "delete",
- [
- wpReason => $reason,
- wpEditToken => $editToken,
- ],
- );
- }
- print "\tDone!\n";
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ actions::deletePage( $title, getPageContents( $title ), "Refreshing page" );
} else {
- print "Error!\nYou may not have the permission to delete pages, or the page is already deleted.\n";
+ print "Refreshed page $title.\n";
}
}
Modified: trunk/includes/http.pm
===================================================================
--- trunk/includes/http.pm 2006-09-16 11:45:57 UTC (rev 19)
+++ trunk/includes/http.pm 2006-09-16 12:30:18 UTC (rev 20)
@@ -55,7 +55,7 @@
# Post a wiki page, try again and again if error
sub postPage {
# Get parameters
- my ($title, $action, $post, $get) = @_;
+ my ( $title, $action, $post, $get ) = @_;
my $url = buildPageURL( $title, $action, $get );
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <rot...@us...> - 2006-09-16 11:46:09
|
Revision: 19
http://svn.sourceforge.net/perlwikibot/?rev=19&view=rev
Author: rotemliss
Date: 2006-09-16 04:45:57 -0700 (Sat, 16 Sep 2006)
Log Message:
-----------
Reformatting the code: whitespaces, code style, return statements, etc.
Modified Paths:
--------------
trunk/bot.pl
trunk/config/configure.sample
trunk/config/runtime.sample
trunk/includes/functions.pm
trunk/includes/http.pm
Modified: trunk/bot.pl
===================================================================
--- trunk/bot.pl 2006-09-16 11:06:13 UTC (rev 18)
+++ trunk/bot.pl 2006-09-16 11:45:57 UTC (rev 19)
@@ -10,27 +10,22 @@
use config::runtime;
# Counters
-my ($i, $j, $k, $l, $m);
+my ( $i, $j, $k, $l, $m );
# Do nothing if no server specified
-if (!$ARGV[0])
-{
- die("No server name set - please set server name!\n");
+if ( !$ARGV[0] ) {
+ die "No server name set - please set server name!\n";
}
# Show notes about the sending pages configuration
-if ($configure::sendPages == 0)
-{
+if ( $configure::sendPages == 0 ) {
print "Note: There will be no changes in the real server. Please set sendPages to 1 to do them.\n";
-}
-elsif ($configure::sendPages == 2)
-{
+} elsif ( $configure::sendPages == 2 ) {
print "Note: A prefix will be used when editing pages. Please set sendPages to 1 to disable that.\n";
}
# Log in to the server only if you have to do some stuff in the server
-if ($configure::sendPages == 1 || $configure::sendPages == 2)
-{
+if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
functions::login();
}
@@ -38,15 +33,13 @@
my @pages = functions::getMatchingPages();
# Execute actions of all the pages
-for ($i = 0; ($i <= $#pages); $i += 2)
-{
+for ( $i = 0; $i <= $#pages; $i += 2 ) {
# Get title and action
my $title = $pages[$i];
- my $action = $configure::actions{$pages[$i + 1]};
-
+ my $action = $configure::actions{ $pages[$i + 1] };
+
# Execute the action (TODO: add the actions which were supported in the previous bot)
- if ($action eq "replace")
- {
- functions::replaceInPage($title);
+ if ( $action eq "replace" ) {
+ functions::replaceInPage( $title );
}
}
Modified: trunk/config/configure.sample
===================================================================
--- trunk/config/configure.sample 2006-09-16 11:06:13 UTC (rev 18)
+++ trunk/config/configure.sample 2006-09-16 11:45:57 UTC (rev 19)
@@ -8,9 +8,6 @@
# Libraries
use Time::local;
-# Counters
-my ($i, $j, $k, $l, $m);
-
# Available servers and details about them, the bot should work on
our %servers = (
"server" => "http://www.server.com",
@@ -34,7 +31,7 @@
"server" => "site.xml",
); # The XML file of the database
our %initialEditSummaries = (
- "server" => "[[Project:Replace bot|Replace bot]] – "
+ "server" => "[[Project:Replace bot|Replace bot]] – ",
); # Initial edit summary, when several replaces are done and several edit summaries are integrated, using the replace script
our %serverPrefixes = (
"server" => "User:My-bot/Tests/",
@@ -42,26 +39,25 @@
# Avilable actions and details about them
our %actions = (
- "action" => "move"
+ "action" => "move",
); # The global actions done
our %actionServers = (
- "action" => "server"
+ "action" => "server",
); # Which server should it be executed on? TODO - allow using multiple servers like that, currently ignored
our %titles = (
- "action" => "^שיחת משתמש:[\\d]+\\.[\\d]+\\.[\\d]+\\.[\\d]+\$"
+ "action" => "^שיחת משתמש:[\\d]+\\.[\\d]+\\.[\\d]+\\.[\\d]+\$",
); # Restrict the actions to these titles
our %texts = (
- "action" => "^אנונימי\$"
+ "action" => "^אנונימי\$",
); # Restrict the actions to the pages
our %news = (
- "action" => "שיחת משתמש:אנונימי"
+ "action" => "שיחת משתמש:אנונימי",
); # New title (move) or text (replace), leave blank otherwise
our %reasons = (
- "action" => "מחליף שיחות אנונימיות"
+ "action" => "מחליף שיחות אנונימיות",
); # Summary/reason sent about the action
our %minimumTimes = (
- "action" => timegm(0, 0, 0, 1, 1 - 1, 1970 - 1900)
+ "action" => timegm( 0, 0, 0, 1, 1 - 1, 1970 - 1900 ),
); # The minimum diff between the current time and the last edit time
-# Return a true value
-1;
+return 1;
Modified: trunk/config/runtime.sample
===================================================================
--- trunk/config/runtime.sample 2006-09-16 11:06:13 UTC (rev 18)
+++ trunk/config/runtime.sample 2006-09-16 11:45:57 UTC (rev 19)
@@ -5,17 +5,14 @@
use warnings;
use strict;
-# Counters
-my ($i, $j, $k, $l, $m);
-
# Actions to execute
our @executedActions = (
- "action"
+ "action",
);
# Actions to execute by the way, if you are already editing the page (replace only)
our @bywayActions = (
- "actionB"
+ "actionB",
);
# This variable enables and disables the actual sends to the server.
@@ -27,5 +24,4 @@
# advanced tests, before you use 1 for the real changes.
our $sendPages = 0;
-# Return a true value
-1;
+return 1;
Modified: trunk/includes/functions.pm
===================================================================
--- trunk/includes/functions.pm 2006-09-16 11:06:13 UTC (rev 18)
+++ trunk/includes/functions.pm 2006-09-16 11:45:57 UTC (rev 19)
@@ -12,221 +12,191 @@
use config::runtime;
# Counters
-my ($i, $j, $k, $l, $m);
+my ( $i, $j, $k, $l, $m );
# Get all the pages matching specific restrictions
-sub getMatchingPages
-{
+sub getMatchingPages {
# Get the pages
- my $pages = mwdump->new("dumps/".$configure::xmlFiles{$ARGV[0]});
-
+ my $pages = mwdump->new( "dumps/".$configure::xmlFiles{ $ARGV[0] } );
+
# Can use only "first-letter", TODO: enable also "case-sensitive" for Wiktionary
- if ($pages->case ne "first-letter")
- {
+ if ( $pages->case ne "first-letter" ) {
die "Unable to handle any case setting besides \"first-letter\".\n";
}
-
+
# Get the executed actions
my @possibleActions = @configure::executedActions;
-
+
# An array contains all the pages
my @matchingPages;
-
+
# Reset counter
my $counter = 0;
-
+
# Iterate over all the pages
print "Checking all the pages...\n";
- while (my $page = $pages->page)
- {
+ while ( my $page = $pages->page ) {
# Temporary array counter
my $tempCounter;
-
+
# Get the page possible actions
my @pagePossibleActions = @possibleActions;
-
+
# Check if the page title matches to one of the titles
my @tempTitleArray;
my $title = $page->title;
- utf8::encode($title);
+ utf8::encode( $title );
$tempCounter = 0;
- for ($i = 0; ($i <= $#pagePossibleActions); $i++)
- {
- my $wantedTitle = $configure::titles{$pagePossibleActions[$i]};
- if ($title =~ /$wantedTitle/)
- {
+ for ( $i = 0; $i <= $#pagePossibleActions; $i++ ) {
+ my $wantedTitle = $configure::titles{ $pagePossibleActions[$i] };
+ if ( $title =~ /$wantedTitle/ ) {
$tempTitleArray[$tempCounter++] = $pagePossibleActions[$i];
}
}
-
+
# Continue if there are no matching pages
@pagePossibleActions = @tempTitleArray;
- if ($#pagePossibleActions < 0)
- {
+ if ( $#pagePossibleActions < 0 ) {
next;
}
-
+
# Check if the page text matches to one of the texts
my @tempTextArray;
my $text = $page->text;
- utf8::encode($$text);
+ utf8::encode( $$text );
$tempCounter = 0;
- for ($i = 0; ($i <= $#pagePossibleActions); $i++)
- {
- my $wantedText = $configure::texts{$pagePossibleActions[$i]};
- if ($$text =~ /$wantedText/)
- {
+ for ( $i = 0; $i <= $#pagePossibleActions; $i++ ) {
+ my $wantedText = $configure::texts{ $pagePossibleActions[$i] };
+ if ($$text =~ /$wantedText/) {
$tempTextArray[$tempCounter++] = $pagePossibleActions[$i];
}
}
-
+
# Continue if there are no matching pages
@pagePossibleActions = @tempTextArray;
- if ($#pagePossibleActions < 0)
- {
+ if ( $#pagePossibleActions < 0 ) {
next;
}
-
+
# All the previous checks were OK - add the page, using the first available action
$matchingPages[$counter++] = $title;
$matchingPages[$counter++] = $pagePossibleActions[0];
}
print "\tDone!\n";
-
- @matchingPages; # Return
+
+ return @matchingPages;
}
# Get page last edit
# TODO: Move to getMatchingPages
-sub getPageLastEdit
-{
+sub getPageLastEdit {
# Get parameters
- my ($xmlFile, $title) = @_;
- utf8::decode($title);
-
+ my ( $xmlFile, $title ) = @_;
+ utf8::decode( $title );
+
# Get the pages
- my $pages = Parse::MediaWikiDump::Pages->new($xmlFile);
-
+ my $pages = mwdump->new($xmlFile);
+
# Can use only "first-letter", which is also the only currently known value,
# but there could be more in the future
- if ($pages->case ne "first-letter")
- {
+ if ( $pages->case ne "first-letter" ) {
die "Unable to handle any case setting besides \"first-letter\".\n";
}
-
+
# Searching for the page, and getting its timestamp
my $lastEditTimestamp = "0";
- while (($lastEditTimestamp eq "0") && (my $page = $pages->page))
- {
- if ($title eq $page->title)
- {
+ while ( $lastEditTimestamp eq "0" && ( my $page = $pages->page ) ) {
+ if ( $title eq $page->title ) {
$lastEditTimestamp = $page->timestamp;
}
}
-
+
# Check the time by RegExp
$lastEditTimestamp =~ /([\d]+)-([\d]+)-([\d]+)T([\d]+):([\d]+):([\d]+)Z/;
-
- timegm($6, $5, $4, $3, $2 - 1, $1 - 1900); # Return
+
+ return timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
}
# Log in
-sub login
-{
+sub login {
# Is sysop finally required?
my $sysop = 0;
-
+
# Go through the array, and check if sysop permission is needed.
- for ($i = 0; ($i <= $#configure::executedActions); $i++)
- {
- my $action = $configure::actions{$configure::executedActions[$i]};
- if ($action eq "replace")
- {
+ for ( $i = 0; $i <= $#configure::executedActions; $i++ ) {
+ my $action = $configure::actions{ $configure::executedActions[$i] };
+ if ( $action eq "replace" ) {
# Continue
- }
- elsif ($action eq "refresh")
- {
+ } elsif ($action eq "refresh") {
# Continue
- }
- elsif ($action eq "delete")
- {
+ } elsif ($action eq "delete") {
$sysop = 1;
- }
- elsif ($action eq "move")
- {
+ } elsif ($action eq "move") {
# Continue
}
}
-
+
# Get user name and password
- my ($username, $password);
- if ($sysop == 1)
- {
- $username = $configure::sysopUserNames{$ARGV[0]};
- $password = $configure::sysopPasswords{$ARGV[0]};
+ my ( $username, $password );
+ if ( $sysop == 1 ) {
+ $username = $configure::sysopUserNames{ $ARGV[0] };
+ $password = $configure::sysopPasswords{ $ARGV[0] };
+ } else {
+ $username = $configure::userNames{ $ARGV[0] };
+ $password = $configure::passwords{ $ARGV[0] };
}
- else
- {
- $username = $configure::userNames{$ARGV[0]};
- $password = $configure::passwords{$ARGV[0]};
- }
-
+
# Log in
print "Logging in...\n";
- http::postPage("Special:Userlogin", "submitlogin",
+ http::postPage( "Special:Userlogin", "submitlogin",
[
wpName => $username,
wpPassword => $password,
- wpRemember => 0
+ wpRemember => 0,
],
- , "type=login");
+ , "type=login" );
print "\tDone!\n";
}
# Get the contents of a page
-sub getPageContents
-{
+sub getPageContents {
# Get parameters
- my ($title) = @_;
-
- http::getPage($title, "raw"); # Return
+ my ( $title ) = @_;
+
+ return http::getPage( $title, "raw" );
}
# Edit page
-sub editPage
-{
+sub editPage {
# Get parameters
- my ($title, $text, $editSummary) = @_;
-
+ my ( $title, $text, $editSummary ) = @_;
+
print "Editing page $title...\n";
-
- if ($configure::sendPages == 2)
- {
- my $serverPrefix = $configure::serverPrefixes{$ARGV[0]};
+
+ if ( $configure::sendPages == 2 ) {
+ my $serverPrefix = $configure::serverPrefixes{ $ARGV[0] };
$title = $serverPrefix.$title;
}
-
+
# Get the edit page contents
- my $editPage = http::getPage($title, "edit");
-
+ my $editPage = http::getPage( $title, "edit" );
+
# Get the start time
$editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpStarttime" \/>/;
my $startTime = $1;
-
+
# Get the edit time
$editPage =~ /<input type='hidden' value="([0-9]{14})" name="wpEdittime" \/>/;
my $editTime = $1;
-
+
# Get edit token
$editPage =~ /<input type='hidden' value="([0-9a-f]{32})" name="wpEditToken" \/>/;
my $editToken = $1;
-
+
# Send page
- if (defined($editToken) && $editToken =~ /[0-9a-f]{32}/)
- {
- if ($configure::sendPages == 1 || $configure::sendPages == 2)
- {
- http::postPage($title, "submit",
+ if ( defined( $editToken ) && $editToken =~ /[0-9a-f]{32}/ ) {
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
+ http::postPage( $title, "submit",
[
wpSection => "",
wpStarttime => $startTime,
@@ -234,102 +204,88 @@
wpEditToken => $editToken,
wpSummary => $editSummary,
wpTextbox1 => $text,
- wpMinoredit => 1
+ wpMinoredit => 1,
],
);
}
print "\tDone!\n";
- }
- else
- {
+ } else {
print "Error!\nThis may be a protected page you don't have permission to edit, or it has deleted since the dump file you use created.\n";
}
}
# Replace regular expressions in the page
-sub replaceInPage
-{
+sub replaceInPage {
# Get parameters
my ($title) = @_;
-
- if ($configure::sendPages == 1 || $configure::sendPages == 2)
- {
+
+ if ( $configure::sendPages == 1 || $configure::sendPages == 2 ) {
# Get the edited page contents
- my $editedPage = getPageContents($title);
-
+ my $editedPage = getPageContents( $title );
+
# Set initial edit summary
- my $editSummary = $configure::initialEditSummaries{$ARGV[0]};
-
+ my $editSummary = $configure::initialEditSummaries{ $ARGV[0] };
+
# Groups array
- my @actions = (@configure::executedActions, @configure::bywayActions);
-
+ my @actions = ( @configure::executedActions, @configure::bywayActions );
+
# Replaced something at all? Flag to check
my $replaced = 0;
-
+
# Replace regular expressions
- for ($i = 0; ($i <= $#actions); $i++)
- {
+ for ( $i = 0; $i <= $#actions; $i++ ) {
my $action = $actions[$i];
- my $search = $configure::texts{$action};
- if ($editedPage =~ /$search/)
- {
- my $replace = $configure::news{$action};
+ my $search = $configure::texts{ $action };
+ if ( $editedPage =~ /$search/ ) {
+ my $replace = $configure::news{ $action };
$editedPage =~ s/$search/$replace/g;
- if ($replaced == 1)
- {
- $editSummary = $editSummary.", ";
+ if ( $replaced == 1 ) {
+ $editSummary = "$editSummary, ";
}
- $editSummary = $editSummary.$configure::reasons{$action};
+ $editSummary = $editSummary.$configure::reasons{ $action };
$replaced = 1;
}
}
-
+
# Edit page only of replaced something
- if ($replaced == 1)
- {
- editPage($title, $editedPage, $editSummary);
+ if ( $replaced == 1 ) {
+ editPage( $title, $editedPage, $editSummary );
}
- }
- else
- {
+ } else {
print "Replaced in page $title.\n";
}
}
# Refresh page
-sub refreshPage
-{
+sub refreshPage {
# Get parameters
- my ($server, $title) = @_;
-
- editPage($title, getPageContents($title), "Refreshing page");
+ my ( $server, $title ) = @_;
+
+ editPage( $title, getPageContents( $title ), "Refreshing page" );
}
# Move page
-sub movePage
-{
+sub movePage {
# Get parameters
- my ($server, $title, $newTitle, $moveReason) = @_;
-
+ my ( $server, $title, $newTitle, $moveReason ) = @_;
+
print "Moving page $title to $newTitle...\n";
-
+
# Get the delete page contents
- my $movePage = http::getPage("Special:Movepage/$title");
-
+ my $movePage = http::getPage( "Special:Movepage/$title" );
+
# Get the edit token
- if ($movePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/)
- {
+ if ($movePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
my $editToken = $1;
-
+
# Send page
- if ($configure::sendPages == 1)
- {
- http::postPage("Special:Movepage", "submit",
+ if ( $configure::sendPages == 1 ) {
+ http::postPage( "Special:Movepage", "submit",
[
wpOldTitle => $title,
wpNewTitle => $newTitle,
wpReason => $moveReason,
- wpEditToken => $editToken
+ wpEditToken => $editToken,
],
);
}
@@ -342,38 +298,32 @@
}
# Delete page
-sub deletePage
-{
+sub deletePage {
# Get parameters
- my ($server, $title, $reason) = @_;
-
+ my ( $server, $title, $reason ) = @_;
+
print "Deleting page $title...\n";
-
+
# Get the delete page contents
- my $deletePage = http::getPage($title, "delete");
-
+ my $deletePage = http::getPage( $title, "delete" );
+
# Get the edit token
- if ($deletePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/)
- {
+ if ($deletePage =~ /<input type='hidden' name='wpEditToken' value="([0-9,a-f]{32})" \/>/) {
my $editToken = $1;
-
+
# Send page
- if ($configure::sendPages == 1)
- {
- http::postPage($title, "delete",
+ if ( $configure::sendPages == 1 ) {
+ http::postPage( $title, "delete",
[
wpReason => $reason,
- wpEditToken => $editToken
+ wpEditToken => $editToken,
],
);
}
print "\tDone!\n";
- }
- else
- {
+ } else {
print "Error!\nYou may not have the permission to delete pages, or the page is already deleted.\n";
}
}
-# Return a true value
-1;
+return 1;
Modified: trunk/includes/http.pm
===================================================================
--- trunk/includes/http.pm 2006-09-16 11:06:13 UTC (rev 18)
+++ trunk/includes/http.pm 2006-09-16 11:45:57 UTC (rev 19)
@@ -9,72 +9,62 @@
use LWP;
use config::configure;
-# Counters
-my ($i, $j, $k, $l, $m);
-
# The browser we use to surf; enable cookies, and use a specific user agent
-my $browser = LWP::UserAgent->new;
-$browser->cookie_jar({});
-$browser->agent("Mozilla/5.0 (compatible; Perl MediaWiki Robot)");
+my $browser = LWP::UserAgent->new();
+$browser->cookie_jar( {} );
+$browser->agent( "Mozilla/5.0 (compatible; Perl MediaWiki Robot)" );
# Build the URL of a wiki page
-sub buildPageURL
-{
+sub buildPageURL {
# Get parameters
- my ($title, $action, $get) = @_;
-
+ my ( $title, $action, $get ) = @_;
+
# Initial URL: server, script path and title
- my $server = $configure::servers{$ARGV[0]};
- my $scriptPath = $configure::scriptPaths{$ARGV[0]};
+ my $server = $configure::servers{ $ARGV[0] };
+ my $scriptPath = $configure::scriptPaths{ $ARGV[0] };
my $url = "$server$scriptPath?title=$title";
-
+
# Action
- if (defined($action))
- {
+ if ( defined( $action ) ) {
$url = "$url&action=$action";
}
-
+
# Parameters
- if (defined($get))
- {
+ if ( defined( $get ) ) {
$url = "$url&$get";
}
-
- $url; # Return
+
+ return $url;
}
# Get a wiki page, try again and again if error
-sub getPage
-{
+sub getPage {
# Get parameters
- my ($title, $action, $get) = @_;
-
- my $url = buildPageURL($title, $action, $get);
-
+ my ( $title, $action, $get ) = @_;
+
+ my $url = buildPageURL( $title, $action, $get );
+
my $result;
- do
- {
- $result = $browser->get($url);
- }
- while (!$result->is_success && $result->status_line =~ /302[\s\w]+/);
-
- $result->content; # Return
+ do {
+ $result = $browser->get( $url );
+ } while ( !$result->is_success && $result->status_line =~ /302[\s\w]+/ );
+
+ return $result->content;
}
# Post a wiki page, try again and again if error
-sub postPage
-{
+sub postPage {
# Get parameters
my ($title, $action, $post, $get) = @_;
-
- my $url = buildPageURL($title, $action, $get);
-
+
+ my $url = buildPageURL( $title, $action, $get );
+
my $result;
- do
- {
- $result = $browser->post($url, $post);
- }
- while (!$result->is_success && !$result->status_line =~ /302[\s\w]+/);
-
- $result->content; # Return
+ do {
+ $result = $browser->post( $url, $post );
+ } while ( !$result->is_success && !$result->status_line =~ /302[\s\w]+/ );
+
+ return $result->content;
}
+
+return 1;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|