#!/bin/bash
#
# /usr/local/sbin/localwebsearch
# Copyright 2001-2002 Jonas Smedegaard <dr@jones.dk>
#
# $Id: localwebsearch,v 1.5 2004-01-20 22:39:19 jonas Exp $
#
# Web search engine maintenance script
#

# halt on errors (NB! this is a bashism...)
set -e

function usage()	{
	echo "Usage: $(basename $0) initprep|init|update|ignore <website> [<website>...]"
	echo "  If no website is given, all are attempted"
	echo "  Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
	exit 1
}

function exit1()	{
	echo "Error: $1"
	echo "Exiting..."
	exit 1
}

# automagically configure when run from cron dirs
case $(dirname $0) in
    /etc/cron.daily)
	stamp=update
	;;
    /etc/cron.weekly)
	stamp=initprep
	;;
    /etc/cron.monthly)
	stamp=ignore
	;;
    *)
	stamp=$1
	shift || usage
	;;
esac

case "$stamp" in
    initprep|init|update|ignore)
	;;
    *)
	usage
	;;
esac

if [ "$stamp" = "ignore" ]; then
	[ $DEBUG ] && echo "Asked to ignore - exiting silently..."
	exit 0
fi

WEBROOT=/var/www
LOGROOT=/var/log/apache

# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
function searchdir()		{ echo $2/VIRTUAL/search.$(dnsdomainname)/www/$3; }
function webdirs()		{ find /etc/htdig -type f -name '*.conf' ! -name 'htdig.conf' -exec basename '{}' .conf \; ; }
#function host()			{ cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
#function domain()		{ cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
function htdig_cfg()		{ echo /etc/htdig/$3.conf; }
function pre_init()		{ true; }
function post_init()		{ true; }
function pre_update()		{ true; }
function post_update()		{ true; }

# The above can be overridden
LOCALCONFIG=/etc/local/websearch.conf
. $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG"

# variables and functions too boring to be configurable
HTDIG_BIN="/usr/bin/rundig"
HTDIG_REAL_BIN="/usr/local/sbin/localrundig" #TODO: Convince Debian maintainer to change official rundig

#function awstats_setlog()	{ sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }

# Webiste/independent checks
test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"

# Index searches for websites from stdin or all default sites
WEBSITES=$@
if [ "$WEBSITES" = "" ]; then
	WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
fi

for WEBSITE in $WEBSITES; do
	SEARCHDIR=$(searchdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve SEARCHDIR."
# FIXME	test -d $SEARCHDIR/.. || exit1 "Directory above SEARCHDIR doesn't exist."

	HTDIG_CFG=$(htdig_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve HTDIG_CFG."

	if [ $stamp = "init" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
		pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
	fi
	if [ $stamp = "update" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
		pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
	fi

#	HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
#	DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."

#	HTDIG_OPTIONS="-a" #TODO: This seems to always init currently
	HTDIG_OPTIONS=""
	[ "$WEBSITE" != "htdig" ] && HTDIG_OPTIONS="$HTDIG_OPTIONS -c $HTDIG_CFG"

	if [ $DEBUG ]; then
		echo "Indexing search for $WEBSITE in $SEARCHDIR:"
		HTDIG_OPTIONS="$HTDIG_OPTIONS -v -s"
	else
		HTDIG_OPTIONS="$HTDIG_OPTIONS"
	fi

	if [ "$stamp" = "init" ]; then
		HTDIG_OPTIONS="$HTDIG_OPTIONS -i"
	fi

	if [ "$stamp" = "initprep" ]; then
		[ $DEBUG ] && echo "Mark next run as a full-scale, and move on to next site..."
		touch /etc/htdig/full_refresh_$WEBSITE
		continue
	fi

# TODO
	if [ $stamp = "initXXX" ]; then
		[ $DEBUG ] && echo "$WEBSITE: Purge SEARCHDIR"
		rm -rf $SEARCHDIR
		mkdir $SEARCHDIR
		if [ -x $HTDIG_BIN ]; then
			[ $DEBUG ] && echo "$WEBSITE: Create/update htdig config"
			echo "\
# NB! This file is automatically generated. Do not edit directly!
# Instead, put additions/overrides in $HTDIG_CFG.local

HOSTNAME	$HOST.$DOMAIN
HOSTURL		http://$HOST.$DOMAIN/
BASEURL		http://$HOST.$DOMAIN
LANGUAGE	DANISH
"\
			> $HTDIG_CFG
			[ -s $HTDIG_CFG.local ] && cat $HTDIG_CFG.local >> $HTDIG_CFG
			mkdir $SEARCHDIR/htdig
		fi
	fi

#	if ! lockfile-create /var/run/localwebsearch_$WEBSITE; then
#		# Another htdig indexing cronjob is already running
#		[ $DEBUG ] && echo "Another $WEBSITE indexing is already running. Ignoring silently..."
#		continue
#	fi

#	lockfile-touch /var/run/localwebsearch_$WEBSITE &
#	# Save the PID of the lockfile-touch process
#	BADGER="$!"

	if [ $stamp = "init" -o $stamp = "update" ]; then
		if [ -x $HTDIG_BIN -a -x $HTDIG_REAL_BIN ]; then
			export TMPDIR=/tmp
			[ $DEBUG ] && echo "$WEBSITE: Update htdig search"
			if [ $stamp = "init" -o -f /etc/htdig/full_refresh_$WEBSITE ]; then
				$HTDIG_REAL_BIN $HTDIG_OPTIONS
				rm -f /etc/htdig/full_refresh_$WEBSITE
			else
				$HTDIG_REAL_BIN $HTDIG_OPTIONS
			fi
		fi
	fi

#	kill "${BADGER}"
#	lockfile-remove /var/run/localwebsearch_$WEBSITE

	if [ $stamp = "init" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
		post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
	fi
	if [ $stamp = "update" ]; then
		[ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
		post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
	fi

done