From: <no...@so...> - 2010-12-11 15:40:37
|
Update of /cvsroot/ijbswa/current/tools In directory sfp-cvsdas-4.v30.ch3.sourceforge.com:/tmp/cvs-serv19093/tools Modified Files: privoxy-log-parser.pl Log Message: Implement a --unbreak-lines-only option to try to revert MUA breakage. Index: privoxy-log-parser.pl =================================================================== RCS file: /cvsroot/ijbswa/current/tools/privoxy-log-parser.pl,v retrieving revision 1.106 retrieving revision 1.107 diff -u -d -r1.106 -r1.107 --- privoxy-log-parser.pl 11 Dec 2010 15:36:47 -0000 1.106 +++ privoxy-log-parser.pl 11 Dec 2010 15:40:29 -0000 1.107 @@ -61,6 +61,7 @@ CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0, CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0, CLI_OPTION_STATISTICS => 0, + CLI_OPTION_UNBREAK_LINES_ONLY => 0, CLI_OPTION_URL_STATISTICS_THRESHOLD => 0, CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0, @@ -2282,6 +2283,27 @@ } +sub unbreak_lines_only_loop() { + my $log_messages_reached = 0; + while (<>) { + chomp; + + # Log level other than LOG_LEVEL_CLF? + if (m/^(\w{3} \d{2}) (\d\d:\d\d:\d\d)\.?(\d+)? (?:Privoxy\()?([^\)\s]*)[\)]? ([\w -]*): (.*?)\r?$/ or + # LOG_LEVEL_CLF? + m/^((?:\d+\.\d+\.\d+\.\d+|[:\d]+)) - - \[(.*)\] "(.*)" (\d+) (\d+)/) { + $log_messages_reached = 1; + print "\n"; + + } else { + # Wrapped message + } + s@<BR>$@@; + print; + print "\n" unless $log_messages_reached; + } +} + sub VersionMessage { my $version_message; @@ -2305,6 +2327,7 @@ 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES, 'statistics' => CLI_OPTION_STATISTICS, 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'unbreak-lines-only' => CLI_OPTION_UNBREAK_LINES_ONLY, 'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD, ); @@ -2318,6 +2341,7 @@ 'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'}, 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, + 'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'}, 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, @@ -2348,6 +2372,7 @@ [--shorten-thread-ids] [--show-ineffective-filters] [--statistics] + [--unbreak-lines-only] [--url-statistics-threshold $cli_options{'url-statistics-threshold'}] [--title $cli_options{'title'}] [--version] @@ -2368,7 +2393,10 @@ print_intro(); - if (cli_option_is_set('statistics')) { + # XXX: should explicitly reject incompatible argument combinations + if (cli_option_is_set('unbreak-lines-only')) { + unbreak_lines_only_loop(); + } elsif (cli_option_is_set('statistics')) { stats_loop(); } else { parse_loop(); @@ -2443,6 +2471,12 @@ they very well might be. Also note that the results are pretty much guaranteed to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync. +[B<--unbreak-lines-only] Tries to fix lines that got messed up by a broken or +interestingly configured mail client and thus are no longer recognized properly. +Only fixes some breakage, but may be good enough or at least better than nothing. +Doesn't do anything else, so you probably want to pipe the output into +B<privoxy-log-parser> again. + [B<--url-statistics-threshold>] Only show the request count for a ressource if it's above or equal to the given threshold. If the threshold is 0, URL statistics are disabled. |