#!/bin/bash
#
# /usr/local/sbin/localwebstats
# Copyright 2001-2002 Jonas Smedegaard <dr@jones.dk>
#
# $Id: localwebstats,v 1.40 2004-12-29 17:14:23 jonas Exp $
#
# Webstats maintenance script
#
# Example config file (/etc/local/webstats.conf).
#
# --- CUT --- CUT --- CUT ---
# #!/bin/sh
# 
# #  * Websites are in /home/<uid>/websites/<fqdn>
# #  * Apache httpd.conf has this line added:
# #	Include /etc/apache/vhosts.d
# #  * Apache use /usr/local/bin/parselog into /var/log/apache-vhosts/
# #  * /usr/lib/apache/suexec is recompiled using /usr/local/bin/make-suexec-for-home
# #  * Each webhost has apache config in /etc/apache/vhosts.d/<fqdn>
# #  * /etc/apache/vhosts.d/<fqdn> has hints about host- and domain-part of fqdn:
# #	# webstats: hostname: <hostname>
# #	# webstats: domainname: <domainname>
#
# #WEBALIZER_OPTIONS="-Q"
# 
# LOGROOT='/var/log/apache-vhosts'
# WEBROOT='/home'
# 
# function statsdir()	{ echo /home/jonas/websites/stats.$(dnsdomainname)/$3; }
# function webdirs()	{ find /etc/apache/vhosts.d/ -type f -exec egrep '^#\W*webstats:' '{}' -l ';' | xargs basename; }
# function host()		{ cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*hostname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
# function domain()	{ cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*domainname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
# 
# --- CUT --- CUT --- CUT ---
#
# TODO: Run as non-provoleged user
#

# halt on errors
set -e

function usage()	{
	echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
	echo "  If no website is given, all are attempted"
	echo "  Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
	exit 1
}

function exit1()	{
	echo "Error: $1"
	echo "Exiting..."
	exit 1
}

# automagically configure when run from cron dirs
case $(dirname $0) in
    /etc/cron.daily)
	stamp=update
	;;
    /etc/cron.weekly)
	stamp=ignore
	;;
    /etc/cron.monthly)
	stamp=ignore
	;;
    *)
	stamp=$1
	shift || usage
	;;
esac

case "$stamp" in
    init|update|prelogrotate|postlogrotate|ignore)
	;;
    *)
	usage
	;;
esac

if [ "$stamp" = "ignore" ]; then
	[ $DEBUG ] && echo "Asked to ignore - exiting silently..."
	exit 0
fi

ROOT=1
user=www-data
group=www-data

LOGROOT=/var/log/apache-vhosts
WEBROOT=/var/www

# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
function statsdir()		{ echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
function webdirs()		{ find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
#function logfiles()		{ $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
function logcontentresolved()	{ for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
function logcontent()		{ for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
function host()			{ cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
function domain()		{ cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
function analog_cfg()		{ echo /etc/analog_$3.conf; }
function rmagic_cfg()		{ echo /etc/rmagic/rmagic_$3.conf; }
function webalizer_cfg()	{ echo /etc/webalizer_$3.conf; }
function modlogan_cfg()		{ echo /etc/modlogan/modlogan_$3.conf; }
function pre_init()		{ true; }
function post_init()		{ statsdir="`statsdir $1 $2 $3`"; test -f $statsdir/../COMMON/index.html && cp $statsdir/../COMMON/index.html $statsdir/ || /bin/true; }
function pre_update()		{ true; }
function post_update()		{ true; }

# The above can be overridden
LOCALCONFIG=/etc/local/webstats.conf
. $LOCALCONFIG || echo "WARNING: Unable to read config file $LOCALCONFIG"

# variables and functions too boring to be configurable
JDRESOLVE_BIN="/usr/bin/jdresolve"
JDRESOLVE_OPTIONS="-r -t 5"
JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
JDRESOLVE_EXPIRY="48"
LOGRESOLVE_BIN="/usr/sbin/logresolve"
ANALOG_BIN="/usr/bin/analog"
RMAGIC_BIN="/usr/bin/rmagic"
WEBALIZER_BIN="/usr/bin/webalizer"
MODLOGAN_BIN="/usr/bin/modlogan"
AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"

function awstats_setlog()	{ sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }

# Website/independent checks
test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
if [ -x $JDRESOLVE_BIN -a -n "$JDRESOLVE_DB" ]; then
	JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS --database=$JDRESOLVE_DB --dbfirst"
	test -d $(dirname "$JDRESOLVE_DB") || exit1 "Cache dir for jdresolve doesn't exist"
	if [ -n "$JDRESOLVE_EXPIRY" ]; then
		if [ $DEBUG ]; then
			jdresolve $JDRESOLVE_OPTIONS --expiredb="$JDRESOLVE_EXPIRY"
		else
			jdresolve $JDRESOLVE_OPTIONS --expiredb="$JDRESOLVE_EXPIRY" > /dev/null 2>&1
		fi
	fi
fi

# Generate stats for websites from stdin or all default sites
WEBSITES=$@
if [ "$WEBSITES" = "" ]; then
	WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
fi

for WEBSITE in $WEBSITES; do
	STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
# FIXME	test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."

	ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
	RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
	WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
	MODLOGAN_CFG=$(modlogan_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve MODLOGAN_CFG."

	if [ $stamp = "init" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
		pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
	fi
	if [ $stamp = "update" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
		pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
	fi

	HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
	DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."

	ANALOG_OPTIONS="-G +g/etc/analog.cfg +g$ANALOG_CFG +A -a"
	RMAGIC_OPTIONS=""
	WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
	MODLOGAN_OPTIONS="-c $MODLOGAN_CFG"
	MODLOGAN_TAIL="2>&1 | egrep -v '^(modlogan [\.0-9]+|.+: startup - finished|[[:space:]]*(\[\.*|[[:space:]]+[0-9]+|\.*\])+|.*unknown country code: (gbl|lcl|tld) .+|.+: No such file or directory, first run \?)$' || true if [ $? \< 2 ]"
	AWSTATS_OPTIONS="-update"
	FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config

	if [ $DEBUG ]; then
		echo "Making stats for $WEBSITE in $STATSDIR:"
		JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
		ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
		RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
		WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
		MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
		MODLOGAN_TAIL=""
		AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
	else
		JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -n"
		ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
		RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
		WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
		MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
		MODLOGAN_TAIL="$MODLOGAN_TAIL"
		AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
	fi

	if [ $stamp = "init" ]; then
		[ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
		rm -rf $STATSDIR
		mkdir $STATSDIR
	fi

	LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
	LOGDATA="$STATSDIR/rawlog_new.txt"
	LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
	touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."

#FIXME run init if folders doesn't exist
	if [ $stamp = "init" ]; then
		logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
		if [ -x $ANALOG_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $ANALOG_CFG.local

HOSTNAME	$HOST.$DOMAIN
HOSTURL		http://$HOST.$DOMAIN/
BASEURL		http://$HOST.$DOMAIN
LANGUAGE	DANISH
"\
			> $ANALOG_CFG
			[ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
			[ -d $STATSDIR/analog ] || mkdir $STATSDIR/analog
			[ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
			rm -f $STATSDIR/analog/cache.data
			$ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
		fi
		if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $RMAGIC_CFG.local

[statistics]
File_In = $STATSDIR/rmagic/report.dat
Frame_File_Out = $STATSDIR/rmagic/index.html
Language = en

[reports]
File_Out = $STATSDIR/rmagic/

[QUICK]
Rows = ALL

[navigation]
File_Out = navfile.html
"\
			> $RMAGIC_CFG
			[ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
			[ -d $STATSDIR/rmagic ] || mkdir $STATSDIR/rmagic
			[ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
			$ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
			$RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
		fi
		if [ -x $WEBALIZER_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $WEBALIZER_CFG.local

HostName	$HOST.$DOMAIN
HideSite	*$DOMAIN
HideReferrer	$DOMAIN/
"\
			> $WEBALIZER_CFG
			[ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
			[ -d $STATSDIR/webalizer ] || mkdir $STATSDIR/webalizer
			[ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
			cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
		fi
		if [ -x $MODLOGAN_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update ModLogAn config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $MODLOGAN_CFG.local

[global]
includepath = /etc/modlogan
include = modlogan.def.conf,global

loadplugin = input_clf
loadplugin = processor_web
loadplugin = output_modlogan
loadplugin = output_template

statedir=$STATSDIR/modlogan

incremental = 0
debug_level = 0
enable_resolver = 0

#read_ahead_limit = 1

var(outputdir, \$statedir)

[processor_web]
include = group.url.conf,group_exploits
include = modlogan.def.conf,processor_web

debug_searchengines = 0
debug_visits = 0

hidereferrer = \"\.${DOMAIN//./\.}/\"

[output_modlogan]
include = modlogan.def.conf, output_modlogan

hostname = $HOST.$DOMAIN

outputdir=\$outputdir

[output_template]
include = modlogan.def.conf, output_template
include = modlogan.def.conf, output_template_reports_web
include = modlogan.def.conf, output_template_menu_web

template_path = /usr/local/share/modlogan/themes/
template_name = basic

variable = HOSTNAME,$HOST.$DOMAIN
variable = CHARSET,iso-8859-1
variable = LANGUAGE,da

hostname = $HOST.$DOMAIN

outputdir=\$outputdir-test

[input_clf]
include = modlogan.def.conf,input_clf
inputfile = -
"\
			> $MODLOGAN_CFG
			[ -s $MODLOGAN_CFG.local ] && cat $MODLOGAN_CFG.local >> $MODLOGAN_CFG
			for dir in $STATSDIR/modlogan $STATSDIR/modlogan-test; do
				[ -d $dir ] || mkdir $dir
				[ $ROOT ] && chown $user:$group $dir
			done
			[ $DEBUG ] && echo "$WEBSITE: Create initial ModLogAn stats"
			if [ $ROOT ]; then
				cat $LOGDATARESOLVED | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL"
			else
				cat $LOGDATARESOLVED | $MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL
			fi
		fi
		if [ -x $AWSTATS_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $AWSTATS_CFG.local

LogFile=\"$LOGDATATMP\"
LogFormat=1
DNSLookup=0
DirData=\"$STATSDIR/awstats\"
AllowToUpdateStatsFromBrowser=0
DirCgi=\"http://cgi.jones.dk/cgi-bin\"
DirIcons=\"http://stats.jones.dk/awstats-icon\"
SiteDomain=\"$FQDN_ESC\"
HostAliases=\"$FQDN_ESC\"
Lang=\"dk\"
DirLang=\"/usr/share/awstats/lang\"
DefaultFile=\"index.html\"
SkipHosts=\"\"
SkipFiles=\"\"
ShowLinksOnUrl=1
ShowFlagLinks=0
"\
			>/etc/awstats/awstats.$WEBSITE.conf
			[ -d $STATSDIR/awstats ] || mkdir $STATSDIR/awstats
			[ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
			awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
			$AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
			awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
		fi
		[ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
		gzip -9 $LOGDATARESOLVED
	fi

	if [ -x $JDRESOLVE_BIN ]; then
		[ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
		logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
	elif [ -x $LOGRESOLVE_BIN ]; then
		[ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
		logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
	else
		[ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
		logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
	fi

	if [ -s $LOGDATA ]; then
		if [ -x $ANALOG_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Update analog stats"
			$ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
		fi
		if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
			$ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
			$RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
		fi
		if [ -x $WEBALIZER_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
			cat $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
		fi
		if [ -x $MODLOGAN_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Update ModLogAn stats"
			if [ $ROOT ]; then
				cat $LOGDATA | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL"
			else
				cat $LOGDATA | $MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL
			fi
		fi
		if [ -x $AWSTATS_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
			awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
			$AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
			awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
		fi
		gzip -f9 $LOGDATA
	fi
	if [ $stamp = "init" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
		post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
	fi
	if [ $stamp = "update" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
		post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
	fi
done