#!/bin/bash
#
# /usr/local/sbin/localwebresolve
# Copyright 2002 Jonas Smedegaard <dr@jones.dk>
#
# $Id: localwebresolve,v 1.3 2002-04-23 23:50:36 jonas Exp $
#
# Webstats maintenance script
#

# halt on errors (NB! this is a bashism...)
set -e

function usage()	{
	echo "Usage: $(basename $0) <YYYY.MM> [<website> [<website>...]]"
	echo "  If no website is given, all are attempted"
	exit 1
}

function exit1()	{
	echo "Error: $1"
	echo "Exiting..."
	exit 1
}

case $1 in
    ????.??)
	YEARDOTMONTH=$1
	;;
    *)
	usage
	;;
esac
shift

ROOT=1
user=www-data
group=www-data

LOGROOT=/var/log/apache
WEBROOT=/var/www

# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE, $4=YEARDOTMONTH
function resolveddir()		{ echo $1/$3; }
function webdirs()		{ find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
#function logfiles()		{ $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
function logcontent()		{ for file in $(find $1/$3 -name "$YEARDOTMONTH.??.gz" ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name "$YEARDOTMONTH.??" ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
function logremove()		{ for file in $(find $1/$3 -name "$YEARDOTMONTH.??.gz" ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do rm -f $file; done; for file in $(find $1/$3 -name "$YEARDOTMONTH.??" ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do rm -f $file; done; }

# The above can be overridden
LOCALCONFIG=/etc/local/webresolve.conf
. $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG"

# variables and functions too boring to be configurable
JDRESOLVE_BIN="/usr/bin/jdresolve"
JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
#JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
# DB access doesn't work currently (no output...)
#JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
JDRESOLVE_OPTIONS="-r"
JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
LOGRESOLVE_BIN="/usr/sbin/logresolve"

# Webiste/independent checks
test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
if [ -x $JDRESOLVE_BIN ]; then
	touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
fi

# Resolve logs for websites from stdin or all default sites
WEBSITES=$@
if [ "$WEBSITES" = "" ]; then
	WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
fi

for WEBSITE in $WEBSITES; do
	RESOLVEDDIR=$(resolveddir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RESOLVEDDIR."
# FIXME	test -d $RESOLVEDDIR/.. || exit1 "Directory above RESOLVEDDIR doesn't exist."

	if [ $DEBUG ]; then
		echo "Resolving logfiles $YEARDOTMONTH for $WEBSITE in $RESOLVEDDIR:"
		JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
	else
		JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
	fi

	LOGDATA="$RESOLVEDDIR/$YEARDOTMONTH.00"
	[ -e $LOGDATA ] && exit1 "File $LOGDATA exists already."
	[ -e $LOGDATA.gz ] && exit1 "File $LOGDATA exists (compressed) already."
	touch $LOGDATA || exit1 "Couldn't touch LOGDATA files."

	if [ -x $JDRESOLVE_BIN ]; then
		[ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
# DB access doesn't work currently (no output...)
#		logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
#		jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
		logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
	elif [ -x $LOGRESOLVE_BIN ]; then
		[ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
		logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
	else
		[ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
		logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
	fi

	if [ -s $LOGDATA ]; then
		gzip -f9 $LOGDATA
	fi
	logremove $LOGROOT $WEBROOT $WEBSITE
done