From: Geert J. <gj...@us...> - 2002-09-03 19:40:51
|
Update of /cvsroot/woc/woc/src/woc/cgi-src/stats In directory usw-pr-cvs1:/tmp/cvs-serv23171/woc/cgi-src/stats Added Files: README.STATS data-stats.pl page-stats-automatic.pl tm-stats.pl Log Message: --- NEW FILE: README.STATS --- This is the README belonging to 'page-stats', a Perl-program by Mark Koenen (ma...@sc...) that 'cleverly' checks how often a WWW-page has been accessed. How does it work? First 'page-stats' reads in a ident-file. After that it reads the logfile just once, counts the page-accesses and then produces the HTML- document, using a header- and footer-file. What does ident-file look like? In a ident-file you'll be able to give 'identifiers' for a certain page: one page can have more than one representatives in a log-file. Therefore the ident-file has the following structure: URL@DESCRIPTION@IDENT1@...@IDENTn Where: URL = The URL to the page you want to look at DESCRIPTION = The description of the page IDENT1, ..., IDENTn = A identifier of the page An example. When you want statistics of your homepage you might want to begin your ident-file with: /index.html@My Homepage@/@/index.html The header- and footer-files: Naming your ident-file 'foo.ident' you'll have to create 'foo.header' and 'foo.footer' too. foo.header will be prepended to the page- statistics and foo.footer will be appended to the page-statistics. The three variables: When creating foo.header and foo.footer you can use three variables: $firstrequest $lastrequest $date Where: $firstrequest = date + time of first request in logfile $lastrequest = date + time of last request in logfile $date = current date + time Calling page-stats: After having set up your ident-, header- and footer-file it's time to let page-stats do what it's supposed to do. You can use page-stats in the following way: page-stats.pl [-i <ident-file>] [-l <logfile>] Where: ident-file = name of your ident-file (default: 'page-stats.ident') logfile = name of your logfile (default: '/usr/local/httpd/logs/access_log') If everything works out fine you'll find a new file called 'foo.html' in your directory. Where can you get it? The latest version is on: http://www.sci.kun.nl/thalia/guide/ You can see a working script at: http://www.sci.kun.nl/thalia/page-stats/ Troubleshooting: If the program didn't do a damn thing you might want to check if the path to Perl is correct: by default it's set to '/usr/local/bin/perl'. You can find out if this is a correct path by typing 'which perl' at your UNIX-prompt. If if perl is in another directory than '/usr/local/bin/' you'll have to edit the first line of 'page-stats.pl' If it _still_ doesn't work: too bad, go find something else to do :-) --- NEW FILE: data-stats.pl --- #!/usr/local/bin/perl -w use strict; my $root = "../.."; my %numberof = (); $numberof{"items"} = int(`ls $root/web-docs/gui/items/*.shtml | wc -w`); $numberof{"chemicals"} = int(`grep -i CLASS=\\"CHEMICAL\\" $root/data/wml/*.xml | wc -l`); $numberof{"mixtures"} = int(`grep -i CLASS=\\"MIXTURE\\" $root/data/wml/*.xml | wc -l`); $numberof{"polymers"} = int(`grep -i CLASS=\\"POLYMER\\" $root/data/wml/*.xml | wc -l`); $numberof{"reactions"} = int(`grep -i CLASS=\\"REACTION\\" $root/data/wml/*.xml | wc -l`); $numberof{"groups"} = int(`grep -i CLASS=\\"GROUP\\" $root/data/wml/*.xml | wc -l`); $numberof{"nomenclatuur"} = int(`grep -i CLASS=\\"NOMENCLATURE\\" $root/data/wml/*.xml | wc -l`); $numberof{"overigen"} = $numberof{"items"}-$numberof{"chemicals"}-$numberof{"mixtures"}-$numberof{"polymers"}-$numberof{"reactions"}-$numberof{"groups"}-$numberof{"nomenclatuur"}; $numberof{"linguistisch"} = 0; $numberof{"afkortingen"} = int(`grep -i ABBREV $root/data/wml/*.xml | wc -l`); $numberof{"nederlands"} = int(`grep -i ID=\\"NL\\" $root/data/wml/*.xml | wc -l`); $numberof{"engels"} = int(`grep -i ID=\\"EN\\" $root/data/wml/*.xml | wc -l`); $numberof{"duits"} = int(`grep -i ID=\\"DE\\" $root/data/wml/*.xml | wc -l`); $numberof{"frans"} = int(`grep -i ID=\\"FR\\" $root/data/wml/*.xml | wc -l`); $numberof{"linguistisch"} = $numberof{"afkortingen"}+$numberof{"nederlands"}+$numberof{"engels"}+$numberof{"duits"}+$numberof{"frans"}; $numberof{"dadml"} = 0; $numberof{"2d"} = 0; $numberof{"2d-cml"} = int(`ls $root/data/dadml/2d/cml/*.cml | wc -w`); $numberof{"2d-gif"} = int(`ls $root/data/dadml/2d/gif/*.gif | wc -w`); $numberof{"2d-mol"} = int(`ls $root/data/dadml/2d/mol/*.mol | wc -w`); $numberof{"2d"} = $numberof{"2d-cml"}+$numberof{"2d-gif"}+$numberof{"2d-mol"}; $numberof{"3d"} = 0; $numberof{"3d-cml"} = int(`ls $root/data/dadml/3d/cml/*.cml | wc -w`); $numberof{"3d-mol"} = int(`ls $root/data/dadml/3d/mol/*.mol | wc -w`); $numberof{"3d-pdb"} = int(`ls $root/data/dadml/3d/pdb/*.pdb | wc -w`); $numberof{"3d-vrml"} = int(`ls $root/data/dadml/3d/vrml/*.vrml | wc -w`); $numberof{"3d-xyz"} = int(`ls $root/data/dadml/3d/xyz/*.xyz | wc -w`); $numberof{"3d"} = $numberof{"3d-cml"}+$numberof{"3d-mol"}+$numberof{"3d-pdb"}+$numberof{"3d-vrml"}+$numberof{"3d-xyz"}; $numberof{"fysisch"} = 0; $numberof{"fys-cml"} = int(`ls $root/data/dadml/fysisch/cml/*.xml | wc -w`); $numberof{"fysisch"} = $numberof{"fys-cml"}; $numberof{"dadml"} = $numberof{"2d"}+$numberof{"3d"}+$numberof{"fysisch"}; $numberof{"pictures"} = int(`find $root/data/pictures -name "*.gif" -print | wc -w`); $numberof{"pict-homos"} = int(`ls $root/data/pictures/homos/*.gif | wc -w`); $numberof{"pict-lumos"} = int(`ls $root/data/pictures/lumos/*.gif | wc -w`); $numberof{"database"} = 0; $numberof{"xml"} = int(`ls $root/data/wml/*.xml | wc -w`); $numberof{"cml-totaal"} = $numberof{"2d-cml"}+$numberof{"3d-cml"}+$numberof{"fys-cml"}; $numberof{"mol-totaal"} = $numberof{"2d-mol"}+$numberof{"3d-mol"}; $numberof{"gif-totaal"} = $numberof{"2d-gif"}+$numberof{"pictures"}; $numberof{"database"} = $numberof{"xml"}+$numberof{"dadml"}; print <<EOT; Content-type: text/html <HTML> <HEAD> <TITLE>Woordenboek Organische Chemie</TITLE> <BASE HREF="http://www-woc.sci.kun.nl/"> <LINK REL="stylesheet" href="gui/styles/woc.css" TYPE="text/css"> <SCRIPT SRC="gui/javascript/isframe.js"></SCRIPT> <SCRIPT SRC="gui/javascript/location.js"></SCRIPT> <SCRIPT> <!-- setLocation("Zoekresultaten"); // --> </SCRIPT> </HEAD> <BODY CLASS="main"> <H1>Data Statistieken</H1> <HR> <P>Dit woordenboek bevat... <UL> <P>$numberof{"items"} items : <UL> $numberof{"chemicals"} chemicaliën<BR> $numberof{"mixtures"} mengsels<BR> $numberof{"polymers"} polymeren<BR> $numberof{"reactions"} reacties<BR> $numberof{"groups"} groepen van woorden<BR> $numberof{"nomenclatuur"} nomenclatuur-regels<BR> $numberof{"overigen"} overigen </UL> <P>$numberof{"linguistisch"} woorden : <UL> $numberof{"afkortingen"} afkortingen<BR> $numberof{"nederlands"} Nederlandstalige woorden<BR> $numberof{"engels"} Engelstalige vertalingen<BR> $numberof{"duits"} Duitstalige vertalingen<BR> $numberof{"frans"} Franstalige vertalingen </UL> <P>$numberof{"database"} gegevensbestanden : <UL> <P><I>woorden :</I></P> $numberof{"xml"} XML-bestanden<BR> <P><I>2D-structuren :</I></P> $numberof{"2d-cml"} CML-bestanden<BR> $numberof{"2d-gif"} GIF-bestanden<BR> $numberof{"2d-mol"} (MDL) MOL-bestanden <P><I>3D-structuren :</I></P> $numberof{"3d-cml"} CML-bestanden<BR> $numberof{"3d-mol"} (MDL) MOL-bestanden<BR> $numberof{"3d-pdb"} PDB-bestanden<BR> $numberof{"3d-vrml"} VRML-bestanden<BR> $numberof{"3d-xyz"} XYZ-bestanden <P><I>fysische gegevensbestanden :</I></P> $numberof{"fys-cml"} CML-bestanden </UL> <!-- $numberof{"pictures"} plaatjes: <UL> $numberof{"pict-homos"} HOMO<BR> $numberof{"pict-lumos"} LUMO orbitalen </UL--> </UL> <P><HR> <CENTER> U bent de <!--WEBBOT bot="HTMLMarkup" startspan ALT="Site Meter" --> <script>var site="woccounter"</script> <script language="JavaScript1.2" src="http://www.sitemeter.com/js/counter.js?site=woccounter"> </script> <noscript> <a href="http://www.sitemeter.com/stats.asp?site=woccounter" target="_top"> <img src="http://www.sitemeter.com/meter.asp?site=woccounter" alt="Site Meter" nosave="" border="0" /></a> </noscript> <script>document.write("//"+"-"+"-"+">");</script> <!-- Copyright 1999 Site Meter --> <!--WEBBOT bot="HTMLMarkup" Endspan -->ste bezoeker. </CENTER> <hr /> <p> </BODY> </HTML> EOT --- NEW FILE: page-stats-automatic.pl --- #!/usr/local/bin/perl # # Just some defaults: $logfile = "/vol/www/sun4irc/logs/access/rest_log"; $weeklogfile = "/vol/www/sun4irc/httpd/logs/access_log"; $identfile = "data/woc.ident"; $sum = 0; # Process switches: if($ARGV[0] =~ /^week/i) { $logfile = $weeklogfile; } elsif ($ARGV[0] =~ /^-debug/i) { $debug = "True"; } if ( -r $logfile ) { if ( -r $identfile ) { # Well, the important files are there. Start generating the # HTML-file # Let's determine the other filenames: $strippedfile = substr($identfile, 0, rindex($identfile, ".")); $htmlfile = $strippedfile . ".html"; $headerfile = $strippedfile . ".header"; $footerfile = $strippedfile . ".footer"; # Calculate some variables: $firstrequest = `head -1 $logfile`; $firstrequest =~ s/^.*\[(\S*)\s.*$/$1/; chomp ($firstrequest); $lastrequest = `tail -1 $logfile`; $lastrequest =~ s/^.*\[(\S*)\s.*$/$1/; chomp ($lastrequest); $date = `/usr/bin/date '+\%H:\%Mh \%d-\%m-\%y'`; chomp ($date); # Write HEADER-file to HTML-file. I've put it here to show that the page is loading... print "Content-type: text/html\n\n"; if ( open (HEADER, $headerfile) ) { while (<HEADER>) { s/\$firstrequest/$firstrequest/gi; s/\$lastrequest/$lastrequest/gi; s/\$date/$date/gi; print( $_ ); } } else { print ( "<HTML>\n<HEAD>\n<TITLE>Page-statistics</TITLE>\n"); print ( "</HEAD>\n<BODY>\n"); } close (HEADER); # Read the identifiers-file: @identarray = (); open (IDENT, $identfile) || die $!; while (<IDENT>) { print "in:$_" if $debug; $identline = "0\@"; split("\@", $_); $identline .= shift(@_) . "\@" . shift(@_) . "\@"; while ( @_ != () ) { $_ = shift(@_); $_ =~ s/^\s*([\S_]*)\s*$/$1/; if ( $_ ne "") { $identline .= " " . $_ . " \@"; } } push (@identarray, $identline); print "half:$identline$/" if $debug; } close (IDENT) || die $!; # Read the logfile and check if the page is in the identarray open (LOG, $logfile) || die $!; while (<LOG>) { # get url from log-line s/^.*"(POST|GET)\s+([\S_?]+)\s+.*$/$2/i; # remove newline chomp($_); # remove spaces s/ //g; # remove tabs s/ //g; # if non-empty: check for presence in identarray if ( $_ ne "") { # Waarvoor dit is weet ik niet, maar daardoor worden de # argumenten van scripts niet geevalueerd... # Geert, 2 febr. 1999 if ( index($_, "+") != -1 ) { $_ = substr($_, 0, index($_,"+")+1); } $pageurl = " " . $_ . " "; foreach $identline (@identarray) { # if pageurl is in identarray then increase the counter if ( index($identline, $pageurl) != -1 ) { $len = index($identline, "\@"); $identline = ( substr($identline, 0, $len) + 1) . substr($identline, $len); } elsif (index($identline, "\*") != -1) { $_ = $identline; s/.*?\@.*?\@.*?\@(.*)\@/$1/; s/(.*)\*/$1/; while ( /\s/ ) { s/\s//; } if ( index($pageurl, $_) != -1 ) { $len = index($identline, "\@"); $identline = ( substr($identline, 0, $len) + 1) . substr($identline, $len); } } } } } close (LOG) || die $!; # Write page-stats to HTML-file print ( "<CENTER><TABLE>\n"); @content = (); ##added## $i = 0; ##added## foreach $identline (@identarray) { $content[$i] = "<TR>"; @splits = split ("\@", $identline); if ($splits[0] == 0) { $width = " 0"; } else { $width = int ($splits[0]); while (length ($width) < 5) { $width = " ".$width; } } $url = $splits[1]; $description = $splits[2]; $description =~ s/^\s*(.*)$/$1/; $content[$i] .= "<!-- $width -->"; $content[$i] .= "<TD ALIGN=Left><A HREF=\"$url\">$description</A></TD>"; $content[$i] .= "<TD>$splits[0]</TD>"; $content[$i] .= "<TD ALIGN=\"Left\"><IMG SRC=\"/sigma/gifs/icon/stats_bar.gif\""; $content[$i] .= " WIDTH=\"$width\" HEIGHT=\"10\"></TD>"; $content[$i] .= "</TR>\n"; $sum = $sum + $splits[0]; $i++; } ##and now print all sorted!!## @content = reverse (sort (@content)); foreach (@content) { if ( /\w*0 --/) { } else { print; } } print ( "</TABLE></CENTER><p>\n"); # Write FOOTER-file to HTML-file if ( open (FOOTER, $footerfile) ) { while (<FOOTER>) { s/\$sum/$sum/gi; s/\$firstrequest/$firstrequest/gi; s/\$lastrequest/$lastrequest/gi; s/\$date/$date/gi; print $_; } } else { print "<HR>\n<EM>Page was generated on $date</EM>\n"; print <<EOT; <CENTER> U bent de <!--WEBBOT bot="HTMLMarkup" startspan ALT="Site Meter" --> <script>var site="woccounter"</script> <script language="JavaScript1.2" src="http://www.sitemeter.com/js/counter.js?site=woccounter"> </script> <noscript> <a href="http://www.sitemeter.com/stats.asp?site=woccounter" target="_top"> <img src="http://www.sitemeter.com/meter.asp?site=woccounter" alt="Site Meter" nosave="" border="0! </noscript> <script>document.write("//"+"-"+"-"+">");</script> <!-- Copyright 1999 Site Meter --> <!--WEBBOT bot="HTMLMarkup" Endspan -->ste bezoeker. </CENTER> <hr /> EOT print "</BODY>\n</HTML>\n"; } close (FOOTER) || die $!; print "\n"; } } --- NEW FILE: tm-stats.pl --- #!/usr/local/bin/perl -w use strict; my $root = "../.."; my $topicmap = "$root/data/topicmap/associations.topicmap"; my %assocs = (); my %numberof = (); # read associations open(TOPICMAP, "<$topicmap"); my $id = ""; my $name = ""; while (my $line = <TOPICMAP>) { if ($line =~ /topic\s*id\=\"(.*)\"/i) { $id = $1; } elsif ($line =~ /basename\>(.*?)\<\/basename/i) { $name = $1; } if (($id ne "") && ($name ne "")) { $assocs{$id} = $name; $id = ""; $name = ""; } } close(TOPICMAP); # calc stats my $files = `find $root/data/wml -name \"*.xml\"`; $files =~ s/\n|\r/\ /g; $numberof{"topicmaps"} = int(`grep -i \"<tm:topic\" $files | wc -l`); $numberof{"assocs"} = int(`grep -i \"<tm:assoc\" $files | wc -l`); $numberof{"assoctypes"} = scalar keys %assocs; print <<EOT; Content-type: text/html <HTML> <HEAD> <TITLE>Woordenboek Organische Chemie</TITLE> <BASE HREF="http://www-woc.sci.kun.nl/"> <LINK REL="stylesheet" href="gui/styles/woc.css" TYPE="text/css"> <SCRIPT SRC="gui/javascript/isframe.js"></SCRIPT> <SCRIPT SRC="gui/javascript/location.js"></SCRIPT> <SCRIPT> <!-- setLocation("TopicMap Stats"); // --> </SCRIPT> </HEAD> <BODY CLASS="main"> <H1>Data Statistieken: Topic Map</H1> <HR> <P>Dit woordenboek bevat... <UL> <P>$numberof{"assoctypes"} topic maps gedefinieerd: <UL> <P><I>gebruikt:</I></P> <UL> $numberof{"topicmaps"} topic maps<BR> $numberof{"assocs"} associaties<BR> </UL> <P><I>naar associatie:</I></P> EOT foreach my $associd (keys %assocs) { my $aantal = int(`grep -i \"$associd" $files | wc -l`); if ($aantal > 0) { print " $aantal x $assocs{$associd}<BR>\n"; } } print <<EOT; <UL> </UL> </UL> </UL> <P><HR> <CENTER> U bent de <!--WEBBOT bot="HTMLMarkup" startspan ALT="Site Meter" --> <script>var site="woccounter"</script> <script language="JavaScript1.2" src="http://www.sitemeter.com/js/counter.js?site=woccounter"> </script> <noscript> <a href="http://www.sitemeter.com/stats.asp?site=woccounter" target="_top"> <img src="http://www.sitemeter.com/meter.asp?site=woccounter" alt="Site Meter" nosave="" border="0" /></a> </noscript> <script>document.write("//"+"-"+"-"+">");</script> <!-- Copyright 1999 Site Meter --> <!--WEBBOT bot="HTMLMarkup" Endspan -->ste bezoeker. </CENTER> <hr /> <p> </BODY> </HTML> EOT |