#!/usr/bin/perl
# Update script for the awstats detail plugin.
# It would be nice to find a way to add this to the plugin itself,
# so the awstats update process would include it.
# Currently only supports the combined log format.
# Written by John Hanely (with parts copied from awstats.pl)
# Distributed under the same terms and license as AWstats

use strict;no strict "refs";
use DBI;
use Date::Manip;
# Set up variables so detail.pm doesn't choke
use vars qw/
$AWScript
%BrowsersHashIDLib
%BrowsersHashIcon
@BrowsersSearchIDOrder
$color_TableBGRowTitle
$color_TableBGTitle
$color_h
$color_k
$color_p
$color_v
$DIR
$DirData
$DirIcons
$FileConfig
$FileSuffix
$FoundNotPageList
@HostAliases
%ValidHTTPCodes
%ValidSMTPCodes
$LogType
@Message
%MonthNumLib
$MonthRequired
%NotPageList
%OSHashID
%OSHashLib
@OSSearchIDOrder
$PROG
$QueryString
%RobotsHashIDLib
@RobotsSearchIDOrder
@RobotsSearchIDOrder_list1
@RobotsSearchIDOrder_list2
@RobotsSearchIDOrder_listgen
%SearchEnginesHashID
@SearchEnginesIDOrder
@SearchEnginesSearchIDOrder
@SearchEnginesSearchIDOrder_list1
@SearchEnginesSearchIDOrder_list2
@SearchEnginesSearchIDOrder_listgen
$ShowSummary
$SiteConfig
$SiteDomain
%TmpBrowser
%TmpDNSLookup
%TmpOS
%TmpRobot
$YearRequired
/;

my $basedir = $0;
$basedir =~ s/\/[^\/]*$//;
require "$basedir/detail.pm";

$DIR = "/usr/share/webapps/awstats/6.7-r2/hostroot/cgi-bin";

&Read_Ref_Data("browsers", "operating_systems", "robots");

################################
# Ganked list init stuff
################################
	my $LevelForRobotsDetection = 3;
	my $LevelForSearchEnginesDetection = 3;
	my @list;
	# Init RobotsSearchIDOrder required for update process
	@list=();
	if ($LevelForRobotsDetection >= 1) {
		foreach (1..$LevelForRobotsDetection) { push @list,"list$_"; }
		push @list,"listgen";		# Always added
	}
	foreach my $key (@list) {
		push @RobotsSearchIDOrder,@{"RobotsSearchIDOrder_$key"};
	}
	# Init SearchEnginesIDOrder required for update process
	@list=();
	if ($LevelForSearchEnginesDetection >= 1) {
		foreach (1..$LevelForSearchEnginesDetection) { push @list,"list$_"; }
		push @list,"listgen";		# Always added
	}
	foreach my $key (@list) {
		push @SearchEnginesSearchIDOrder,@{"SearchEnginesSearchIDOrder_$key"};
	}
################################

my $log = shift;
my $db = shift;

if(!($log && $db))
{
  print STDERR
  "Usage:  $0 logfile dbfile\n";
  exit;
}

if(-e "$db" && `file "$db"` !~ /SQLite/)
{
  print STDERR "Error: $db is not an SQLite database.\n";
  exit;
}

my @drivers = DBI->available_drivers;
if(! grep("SQLite", @drivers))
{
  print STDERR "Error: Need DBD::SQLite perl module";
  exit;
}

my $dbh = DBI->connect("dbi:SQLite:dbname=$db","","");
my @tables = $dbh->tables();
if(! grep($_ eq '"log"', @tables))
{
  $dbh->do("create table log (
  		ip varchar(20),
		user text,
		time integer,
		method text,
		url text,
		status integer,
		bytes integer,
		referer text,
		agent text,
		browser text,
		os text
		);");
}

my $good_lines = 0;
my $bad_lines = 0;
my $existing_lines = 0;

open my $fh, "<$log";
while(my $line = <$fh>)
{
  chomp $line;
  if($line =~ /([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\s+\S+\s+(\S+)\s+\[([^\]]+)\]\s+"([^"\s]+)\s+([^"]+)\s+[^"]+"\s+([0-9]+)\s+([0-9-]+)\s+"([^"]+)"\s+"([^"]+)"/)
  {
    my $date = UnixDate($3,"%s");
    my $bytes = $7;
    my $url = &DecodeEncodedString($5);
    if($bytes == "-") { $bytes = 0; }
    if($date)
    {
      my $sth = $dbh->prepare("select count(*) from log where ip='$1' and user='$2' and time=$date and method='$4' and url='$url' and status=$6 and bytes=$bytes and referer='$8' and agent='$9'");
      $sth->execute;
      my @arr = $sth->fetchrow_array;
      if($arr[0] > 0) { $existing_lines++; }
      else
      {
        $good_lines++;
	my ($browser, $os, $robot) = &analyze_agent($9);
	if($robot && $robot ne "-"){ $browser = $robot; }
        $dbh->do("insert into log values('$1','$2',$date,'$4','$url',$6,$bytes,'$8','$9', '$browser', '$os');");
      }
    }
    else { $bad_lines++; }
  }
  else { $bad_lines++; }
}
close $fh;

print "Added $good_lines lines.\n".
      "Skipped $bad_lines malformed lines.\n".
      "Skipped $existing_lines existing lines.\n";



# Shamelessly ganked from awstats.pl

#------------------------------------------------------------------------------
# Function:     Load the reference databases
# Parameters:	List of files to load
# Input:		$DIR
# Output:		Arrays and Hash tables are defined
# Return:       -
#------------------------------------------------------------------------------
sub Read_Ref_Data {
	# Check lib files in common possible directories :
	# Windows and standard package:        		"$DIR/lib" (lib in same dir than awstats.pl)
	# Debian package:                    		"/usr/share/awstats/lib"
	my @PossibleLibDir=("$DIR/lib","/usr/share/awstats/lib");
	my %FilePath=(); my %DirAddedInINC=();
	my @FileListToLoad=();
	while (my $file=shift) { push @FileListToLoad, "$file.pm"; }
	foreach my $file (@FileListToLoad) {
		foreach my $dir (@PossibleLibDir) {
			my $searchdir=$dir;
			if ($searchdir && (!($searchdir =~ /\/$/)) && (!($searchdir =~ /\\$/)) ) { $searchdir .= "/"; }
			if (! $FilePath{$file}) {	# To not load twice same file in different path
				if (-s "${searchdir}${file}") {
					$FilePath{$file}="${searchdir}${file}";
					# Note: cygwin perl 5.8 need a push + require file
					if (! $DirAddedInINC{"$dir"}) { 
						push @INC, "$dir";
						$DirAddedInINC{"$dir"}=1;
					}
					my $loadret=require "$file";
					#my $loadret=(require "$FilePath{$file}"||require "${file}");
				}
			}
		}
		if (! $FilePath{$file}) {
			my $filetext=$file; $filetext =~ s/\.pm$//; $filetext =~ s/_/ /g;
			print STDERR "Warning: Can't read file \"$file\" ($filetext detection will not work correctly).\nCheck if file is in \"".($PossibleLibDir[0])."\" directory and is readable.\n";
		}
	}
	# Sanity check (if loaded)
	if ((scalar keys %OSHashID) && @OSSearchIDOrder != scalar keys %OSHashID) { error("Not same number of records of OSSearchIDOrder (".(@OSSearchIDOrder)." entries) and OSHashID (".(scalar keys %OSHashID)." entries) in OS database. Check your file ".$FilePath{"operating_systems.pm"}); }
	if ((scalar keys %SearchEnginesHashID) && (@SearchEnginesSearchIDOrder_list1+@SearchEnginesSearchIDOrder_list2+@SearchEnginesSearchIDOrder_listgen) != scalar keys %SearchEnginesHashID) { error("Not same number of records of SearchEnginesSearchIDOrder_listx (total is ".(@SearchEnginesSearchIDOrder_list1+@SearchEnginesSearchIDOrder_list2+@SearchEnginesSearchIDOrder_listgen)." entries) and SearchEnginesHashID (".(scalar keys %SearchEnginesHashID)." entries) in Search Engines database. Check your file ".$FilePath{"search_engines.pm"}." is up to date."); }
	if ((scalar keys %BrowsersHashIDLib) && @BrowsersSearchIDOrder != (scalar keys %BrowsersHashIDLib) - 4) { error("Not same number of records of BrowsersSearchIDOrder (".(@BrowsersSearchIDOrder)." entries) and BrowsersHashIDLib (".((scalar keys %BrowsersHashIDLib) - 4)." entries without msie,netscape,firefox,svn) in Browsers database. May be you updated AWStats without updating browsers.pm file or you made changed into browsers.pm not correctly. Check your file ".$FilePath{"browsers.pm"}." is up to date."); }
	if ((scalar keys %RobotsHashIDLib) && (@RobotsSearchIDOrder_list1+@RobotsSearchIDOrder_list2+@RobotsSearchIDOrder_listgen) != (scalar keys %RobotsHashIDLib) - 1) { error("Not same number of records of RobotsSearchIDOrder_listx (total is ".(@RobotsSearchIDOrder_list1+@RobotsSearchIDOrder_list2+@RobotsSearchIDOrder_listgen)." entries) and RobotsHashIDLib (".((scalar keys %RobotsHashIDLib) - 1)." entries without 'unknown') in Robots database. Check your file ".$FilePath{"robots.pm"}." is up to date."); }
}

#------------------------------------------------------------------------------
# Function:     Read config file
# Parameters:	None or configdir to scan
# Input:        $DIR $PROG $SiteConfig
# Output:		Global variables
# Return:		-
#------------------------------------------------------------------------------
sub Read_Config {
	# Check config file in common possible directories :
	# Windows :                   				"$DIR" (same dir than awstats.pl)
	# Standard, Mandrake and Debian package :	"/etc/awstats"
	# Other possible directories :				"/usr/local/etc/awstats", "/etc"
	# FHS standard, Suse package : 				"/etc/opt/awstats"
	my $configdir=shift;
	my @PossibleConfigDir=();

	if ($configdir)
	{
		# If from CGI, overwriting of configdir is only possible if AWSTATS_ENABLE_CONFIG_DIR defined
		#if ($ENV{'GATEWAY_INTERFACE'} && ! $ENV{"AWSTATS_ENABLE_CONFIG_DIR"})
		#{
		#	error("Sorry, to allow overwriting of configdir parameter from an AWStats CGI usage, environment variable AWSTATS_ENABLE_CONFIG_DIR must be set to 1");
		#}
		#else
		#{
			@PossibleConfigDir=("$configdir");
		#}
	}
	else { @PossibleConfigDir=("$DIR","/etc/awstats","/usr/local/etc/awstats","/etc","/etc/opt/awstats"); }

	# Open config file
	$FileConfig=$FileSuffix='';
	foreach (@PossibleConfigDir) {
		my $searchdir=$_;
		if ($searchdir && $searchdir !~ /[\\\/]$/) { $searchdir .= "/"; }
		if (open(CONFIG,"$searchdir$PROG.$SiteConfig.conf")) 	{ $FileConfig="$searchdir$PROG.$SiteConfig.conf"; $FileSuffix=".$SiteConfig"; last; }
		if (open(CONFIG,"$searchdir$PROG.conf"))  				{ $FileConfig="$searchdir$PROG.conf"; $FileSuffix=''; last; }
	}
	if (! $FileConfig) { error("Couldn't open config file \"$PROG.$SiteConfig.conf\" nor \"$PROG.conf\" after searching in path \"".join(',',@PossibleConfigDir)."\": $!"); }

	# Analyze config file content and close it
	&Parse_Config( *CONFIG , 1 , $FileConfig);
	close CONFIG;
	
	# If parameter NotPageList not found, init for backward compatibility
	if (! $FoundNotPageList) {
		%NotPageList=('css'=>1,'js'=>1,'class'=>1,'gif'=>1,'jpg'=>1,'jpeg'=>1,'png'=>1,'bmp'=>1,'ico'=>1,'swf'=>1);
	}
	# If parameter ValidHTTPCodes empty, init for backward compatibility
	if (! scalar keys %ValidHTTPCodes) { $ValidHTTPCodes{"200"}=$ValidHTTPCodes{"304"}=1; }
	# If parameter ValidSMTPCodes empty, init for backward compatibility
	if (! scalar keys %ValidSMTPCodes) { $ValidSMTPCodes{"1"}=$ValidSMTPCodes{"250"}=1; }
}

#------------------------------------------------------------------------------
# Function:     Decode an precompiled regex value to a common regex value
# Parameters:   compiledregextodecode
# Input:        None
# Output:       None
# Return:		standardregex
#------------------------------------------------------------------------------
sub UnCompileRegex {
	my $ret = shift;
	if($ret =~ /\(\?[-\w]*:(.*)\)/)
	{ return $1; }
	return $ret;
}

#------------------------------------------------------------------------------
# Function:     Decode an only text string into a binary string
# Parameters:   stringtodecode
# Input:        None
# Output:       None
# Return:		decodedstring
#------------------------------------------------------------------------------
sub DecodeEncodedString {
	my $stringtodecode=shift;
	$stringtodecode =~ tr/\+/ /s;
	$stringtodecode =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/ieg;
	return $stringtodecode;
}
