From: Charles C. <ch...@ru...> - 2005-02-07 08:43:55
|
Here are two new queries that I wrote to analyse search engine activity from the SQL access log. These can be inserted into the AnalyseAccessLog.inc file that I uploaded last week - the insertion point should be just before the final "}". The queries contain a list of DNS names for search engine crawlers/spiders/whatever - but this is based only on what I saw in my sites' accesslog. Any additions are welcome. regards, Charles } elseif ($queryName=="Search Bots") { // This queries for all entries in the SQL access log table that // have a dns name that I know to be a web search engine crawler and // categorises the results into time buckets as per the list below // 0 - 1 minute - 60 // 1 - 1 hour - 3600 = 60 * 60 // 2 - 1 day - 86400 = 60 * 60 * 24 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25 $now = time(); $query = "SELECT " ."IF($now-time_stamp<60, '0 - last minute', IF($now -time_stamp<3600, '1 - 1 minute to 1 hour', IF($now-time_stamp<86400, '2 - 1 hour to 1 day', IF($now-time_stamp<604800, '3 - 1 day to 1 week', IF($now-time_stamp<2629800, '4 - 1 week to 1 month', IF($now-time_stamp<31557600, '5 - 1 month to 1 year', '6 - more than 1 year')))))) AS 'Time Scale', " ."remote_host AS 'Remote Host', " ."count(*) AS 'Access Count' " ."FROM $accesslog " ."WHERE (remote_host LIKE '%googlebot.com' " ."OR remote_host LIKE '%alexa.com' " ."OR remote_host LIKE '%inktomisearch.com' " ."OR remote_host LIKE '%msnbot.msn.com') " .($whereConditions ? 'AND '.$whereConditions : '') ."GROUP BY 'Time Scale', 'Remote Host'"; } elseif ($queryName=="Search Bot Page Hits") { // This queries for all entries in the SQL access log table that // have a dns name that I know to be a web search engine crawler and // displays the URI that was hit. // If PHPSESSID appears in the URI, just display the URI to the left of this $now = time(); $query = "SELECT " ."IF($now-time_stamp<60, '0 - last minute', IF($now -time_stamp<3600, '1 - 1 minute to 1 hour', IF($now-time_stamp<86400, '2 - 1 hour to 1 day', IF($now-time_stamp<604800, '3 - 1 day to 1 week', IF($now-time_stamp<2629800, '4 - 1 week to 1 month', IF($now-time_stamp<31557600, '5 - 1 month to 1 year', '6 - more than 1 year')))))) AS 'Time Scale', " ."remote_host AS 'Remote Host', " ."IF(instr(request_uri, 'PHPSESS')=0, request_uri, left(request_uri, instr(request_uri, 'PHPSESS')-2)) AS 'Request URI' " ."FROM $accesslog " ."WHERE (remote_host LIKE '%googlebot.com' " ."OR remote_host LIKE '%alexa.com' " ."OR remote_host LIKE '%inktomisearch.com' " ."OR remote_host LIKE '%msnbot.msn.com') " .($whereConditions ? 'AND '.$whereConditions : '') ."ORDER BY time_stamp"; |