#112 Visitor based on IP+USerAgent, Visits on all Hits

open
nobody
None
5
2 days ago
2006-07-11
Antoine EMERIT
No

Add IncludeUAInVisitors=1 to count distinguish visitor
on host + user agent, and/or add VisitAllHosts to
calculate session on all hits and not only pages.

This must give you counts near other web stats software
(webtrend, ...) especially from AOL visitors.

/usr/src/awstats-6.5/wwwroot/cgi-bin/awstats.pl
2005-11-24 21:11:19.000000000 +0100
--- awstats.pl 2006-07-11 13:35:00.000000000 +0200
***
143,148 *
--- 143,150 ----
$AuthenticatedUsersNotCaseSensitive
$Expires $UpdateStats $MigrateStats
$URLNotCaseSensitive $URLWithQuery $URLReferrerWithQuery
$DecodeUA
+ $IncludeUAInVisitors
+ $VisitAllHosts
/;
($DebugMessages, $AllowToUpdateStatsFromBrowser,
$EnableLockForUpdate, $DNSLookup,
$AllowAccessFromWebToAuthenticatedUsersOnly,
$BarHeight, $BarWidth, $CreateDirDataIfNotExists,
$KeepBackupOfHistoricFiles,
***
154,161 *
$IncludeInternalLinksInOriginSection,
$AuthenticatedUsersNotCaseSensitive,
$Expires, $UpdateStats, $MigrateStats,
$URLNotCaseSensitive, $URLWithQuery, $URLReferrerWithQuery,
! $DecodeUA)=
!
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
use vars qw/
$DetailedReportsOnNewWindows
$FirstDayOfWeek $KeyWordsNotSensitive
$SaveDatabaseFilesWithPermissionsForEveryone
--- 156,163 ----
$IncludeInternalLinksInOriginSection,
$AuthenticatedUsersNotCaseSensitive,
$Expires, $UpdateStats, $MigrateStats,
$URLNotCaseSensitive, $URLWithQuery, $URLReferrerWithQuery,
! $DecodeUA, $IncludeUAInVisitors, $VisitAllHosts)=
!
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
use vars qw/
$DetailedReportsOnNewWindows
$FirstDayOfWeek $KeyWordsNotSensitive
$SaveDatabaseFilesWithPermissionsForEveryone
***
4958,4968 ****
--- 4960,4973 ----

------------------------------------------------------------------------------

sub ShowHostInfo {
my $host=shift;
+ $host =~ s/+.*//;
# Call to plugins' function ShowInfoHost
foreach my $pluginname (sort keys
%{$PluginsLoaded{'ShowInfoHost'}}) {
# my $function="ShowInfoHost_$pluginname('$host')";
# eval("$function");
my $function="ShowInfoHost_$pluginname";
+ # In case we add the User Agent in the host table
+
&$function($host);
}
}


6163,6168 *
--- 6168,6176 ----
# Processing log
#------------------------------------------

  • if ($IncludeUAInVisitors && $Debug) { debug("
    Include User Agent in Visistor ID.",1); }
  • if ($VisitAllHosts && $Debug) { debug("
    Visit all hosts.",1); }
  • if ($EnableLockForUpdate) {
    # Trap signals to remove lock
    $SIG{INT} = \&SigHandler; # 2


6875,6882 *

    # Analyze: Host, URL entry+exit and Session
    #------------------------------------------

! if ($PageBool) {
! my $timehostl=$_host_l{$HostResolved};
if ($timehostl) {
# A visit for this host was already detected
# TODO everywhere there is $VISITTIMEOUT
--- 6883,6906 ----

    # Analyze: Host, URL entry+exit and Session
    #------------------------------------------

! my $UA;
! my $VisitorId;
! $UA = $UserAgent;
! $UA =~ s/ /_/g;
!
! $VisitorId = $HostResolved;
!
! if ($IncludeUAInVisitors) {
! $VisitorId .= "+" . $UA;
! if ($Debug) { debug(" Include User
Agent in Visistor ID",3); }
! }
!
! if ($VisitAllHosts) {
! if ($Debug) { debug(" This is a
second visit for $VisitorId.",4); }
! }
!
! if ($PageBool || $VisitAllHosts) {
! my $timehostl=$_host_l{$VisitorId};
if ($timehostl) {
# A visit for this host was already detected
# TODO everywhere there is $VISITTIMEOUT


6884,6960 *
# if ($timerecord >
($timehostl+$VISITTIMEOUT+($dateparts[3]>$daytimehostl?$NEWDAYVISITTIMEOUT:0)))
{
if ($timerecord > ($timehostl+$VISITTIMEOUT)) {
# This is a second visit or more
! if (! $_waithost_s{$HostResolved}) {
# This is a second visit or more
# We count 'visit','exit','entry','DayVisits'
! if ($Debug) { debug(" This is a second visit
for $HostResolved.",4); }
! my $timehosts=$_host_s{$HostResolved};
! my $page=$_host_u{$HostResolved};
if ($page) { $_url_x{$page}++; }
$_url_e{$field[$pos_url]}++;
$DayVisits{$yearmonthdayrecord}++;
# We can't count session yet because we don't
have the start so
# we save params of first 'wait' session
! $_waithost_l{$HostResolved}=$timehostl;
! $_waithost_s{$HostResolved}=$timehosts;
! $_waithost_u{$HostResolved}=$page;
}
else {
# This is third visit or more
# We count
'session','visit','exit','entry','DayVisits'
! if ($Debug) { debug(" This is a third visit or
more for $HostResolved.",4); }
! my $timehosts=$_host_s{$HostResolved};
! my $page=$_host_u{$HostResolved};
if ($page) { $_url_x{$page}++; }
$_url_e{$field[$pos_url]}++;
$DayVisits{$yearmonthdayrecord}++;
if ($timehosts) {
$_session{GetSessionRange($timehosts,$timehostl)}++; }
}
# Save new session properties
! $_host_s{$HostResolved}=$timerecord;
! $_host_l{$HostResolved}=$timerecord;
! $_host_u{$HostResolved}=$field[$pos_url];
}
elsif ($timerecord > $timehostl) {
# This is a same visit we can count
! if ($Debug) { debug(" This is same visit still
running for $HostResolved. host_l/host_u changed to
$timerecord/$field[$pos_url]",4); }
! $_host_l{$HostResolved}=$timerecord;
! $_host_u{$HostResolved}=$field[$pos_url];
}
elsif ($timerecord == $timehostl) {
# This is a same visit we can count
! if ($Debug) { debug(" This is same visit still
running for $HostResolved. host_l/host_u changed to
$timerecord/$field[$pos_url]",4); }
! $_host_u{$HostResolved}=$field[$pos_url];
}
! elsif ($timerecord < $_host_s{$HostResolved}) {
# Should happens only with not correctly sorted
log files
! if ($Debug) { debug(" This is same visit still
running for $HostResolved with start not in order.
host_s changed to $timerecord (entry page also changed
if first visit)",4); }
! if (! $_waithost_s{$HostResolved}) {
! # We can reorder entry page only if it's the
first visit found in this update run (The saved entry
page was $_waithost_e if $_waithost_s{$HostResolved} is
not defined. If second visit or more, entry was
directly counted and not saved)
! $_waithost_e{$HostResolved}=$field[$pos_url];
}
else {
# We can't change entry counted as we dont't
know what was the url counted as entry
}
! $_host_s{$HostResolved}=$timerecord;
}
else {
! if ($Debug) { debug(" This is same visit still
running for $HostResolved with hit between start and
last hits. No change",4); }
}
}
else {
# This is a new visit (may be). First new visit
found for this host. We save in wait array the entry
page to count later
! if ($Debug) { debug(" New session (may be) for
$HostResolved. Save in wait array to see later",4); }
! $_waithost_e{$HostResolved}=$field[$pos_url];
# Save new session properties
! $_host_u{$HostResolved}=$field[$pos_url];
! $_host_s{$HostResolved}=$timerecord;
! $_host_l{$HostResolved}=$timerecord;
}
! $_host_p{$HostResolved}++;
}
! $_host_h{$HostResolved}++;
! $_host_k{$HostResolved}+=int($field[$pos_size]);

    # Analyze: Browser - OS
    #----------------------

--- 6908,6984 ----
# if ($timerecord >
($timehostl+$VISITTIMEOUT+($dateparts[3]>$daytimehostl?$NEWDAYVISITTIMEOUT:0)))
{
if ($timerecord > ($timehostl+$VISITTIMEOUT)) {
# This is a second visit or more
! if (! $_waithost_s{$VisitorId}) {
# This is a second visit or more
# We count 'visit','exit','entry','DayVisits'
! if ($Debug) { debug(" This is a second visit
for $VisitorId.",4); }
! my $timehosts=$_host_s{$VisitorId};
! my $page=$_host_u{$VisitorId};
if ($page) { $_url_x{$page}++; }
$_url_e{$field[$pos_url]}++;
$DayVisits{$yearmonthdayrecord}++;
# We can't count session yet because we don't
have the start so
# we save params of first 'wait' session
! $_waithost_l{$VisitorId}=$timehostl;
! $_waithost_s{$VisitorId}=$timehosts;
! $_waithost_u{$VisitorId}=$page;
}
else {
# This is third visit or more
# We count
'session','visit','exit','entry','DayVisits'
! if ($Debug) { debug(" This is a third visit or
more for $VisitorId.",4); }
! my $timehosts=$_host_s{$VisitorId};
! my $page=$_host_u{$VisitorId};
if ($page) { $_url_x{$page}++; }
$_url_e{$field[$pos_url]}++;
$DayVisits{$yearmonthdayrecord}++;
if ($timehosts) {
$_session{GetSessionRange($timehosts,$timehostl)}++; }
}
# Save new session properties
! $_host_s{$VisitorId}=$timerecord;
! $_host_l{$VisitorId}=$timerecord;
! $_host_u{$VisitorId}=$field[$pos_url];
}
elsif ($timerecord > $timehostl) {
# This is a same visit we can count
! if ($Debug) { debug(" This is same visit still
running for $VisitorId. host_l/host_u changed to
$timerecord/$field[$pos_url]",4); }
! $_host_l{$VisitorId}=$timerecord;
! $_host_u{$VisitorId}=$field[$pos_url];
}
elsif ($timerecord == $timehostl) {
# This is a same visit we can count
! if ($Debug) { debug(" This is same visit still
running for $VisitorId. host_l/host_u changed to
$timerecord/$field[$pos_url]",4); }
! $_host_u{$VisitorId}=$field[$pos_url];
}
! elsif ($timerecord < $_host_s{$VisitorId}) {
# Should happens only with not correctly sorted
log files
! if ($Debug) { debug(" This is same visit still
running for $VisitorId with start not in order. host_s
changed to $timerecord (entry page also changed if
first visit)",4); }
! if (! $_waithost_s{$VisitorId}) {
! # We can reorder entry page only if it's the
first visit found in this update run (The saved entry
page was $_waithost_e if $_waithost_s{$VisitorId} is
not defined. If second visit or more, entry was
directly counted and not saved)
! $_waithost_e{$VisitorId}=$field[$pos_url];
}
else {
# We can't change entry counted as we dont't
know what was the url counted as entry
}
! $_host_s{$VisitorId}=$timerecord;
}
else {
! if ($Debug) { debug(" This is same visit still
running for $VisitorId with hit between start and last
hits. No change",4); }
}
}
else {
# This is a new visit (may be). First new visit
found for this host. We save in wait array the entry
page to count later
! if ($Debug) { debug(" New session (may be) for
$VisitorId. Save in wait array to see later",4); }
! $_waithost_e{$VisitorId}=$field[$pos_url];
# Save new session properties
! $_host_u{$VisitorId}=$field[$pos_url];
! $_host_s{$VisitorId}=$timerecord;
! $_host_l{$VisitorId}=$timerecord;
}
! $_host_p{$VisitorId}++;
}
! $_host_h{$VisitorId}++;
! $_host_k{$VisitorId}+=int($field[$pos_size]);

    # Analyze: Browser - OS
    #----------------------

8066,8071
--- 8090,8096 ----
if ($HTMLOutput{'lasthosts'}) {
&BuildKeyList($MaxRowsInHTMLOutput,$MinHit{'Host'},\%_host_h,\%_host_l);
}
foreach my $key (@keylist) {
my $host=CleanFromCSSA($key);
+ $host =~ s/+.
//; # remove User Agent (if any)
print "".($_robot_l{$key}?'':'')."$host".($_robot_l{$key}?'':'')."";
&ShowHostInfo($key);
if ($ShowHostsStats =~ /P/i) { print
"".($_host_p{$key}?$_host_p{$key}:" ").""; }
**
8109,8114
--- 8134,8140 ----

&BuildKeyList($MaxRowsInHTMLOutput,$MinHit{'Host'},\%_host_h,\%_host_p);
foreach my $key (@keylist) {
my $host=CleanFromCSSA($key);
+ $host =~ s/+.*//; # remove User Agent (if any)
print "$host";
&ShowHostInfo($key);
if ($ShowHostsStats =~ /P/i) { print
"".($_host_p{$key}?$_host_p{$key}:" ").""; }


9616,9625 *
print "\n";
$total_p=$total_h=$total_k=0;
my $count=0;

&BuildKeyList($MaxNbOf{'HostsShown'},$MinHit{'Host'},\%_host_h,\%_host_p);
foreach my $key (@keylist) {
print "";
! print "$key";
&ShowHostInfo($key);
if ($ShowHostsStats =~ /P/i) { print
''.($_host_p{$key}||" ").''; }
if ($ShowHostsStats =~ /H/i) { print
"$_host_h{$key}"; }
--- 9642,9654 ----
print "\n";
$total_p=$total_h=$total_k=0;
my $count=0;
+ my $host;

&BuildKeyList($MaxNbOf{'HostsShown'},$MinHit{'Host'},\%_host_h,\%_host_p);
foreach my $key (@keylist) {
print "";
! $host = $key;
! $host =~ s/+.*//;
! print "$host";
&ShowHostInfo($key);
if ($ShowHostsStats =~ /P/i) { print
''.($_host_p{$key}||" ").''; }
if ($ShowHostsStats =~ /H/i) { print
"$_host_h{$key}"; }

Discussion

  • Antoine EMERIT
    Antoine EMERIT
    2006-07-11

    The patch in a contextual diff