#!/bin/bash # # /usr/local/sbin/localwebsearch # Copyright 2001-2002 Jonas Smedegaard <dr@jones.dk> # # $Id: localwebsearch,v 1.5 2004-01-20 22:39:19 jonas Exp $ # # Web search engine maintenance script # # halt on errors (NB! this is a bashism...) set -e function usage() { echo "Usage: $(basename $0) initprep|init|update|ignore <website> [<website>...]" echo " If no website is given, all are attempted" echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/" exit 1 } function exit1() { echo "Error: $1" echo "Exiting..." exit 1 } # automagically configure when run from cron dirs case $(dirname $0) in /etc/cron.daily) stamp=update ;; /etc/cron.weekly) stamp=initprep ;; /etc/cron.monthly) stamp=ignore ;; *) stamp=$1 shift || usage ;; esac case "$stamp" in initprep|init|update|ignore) ;; *) usage ;; esac if [ "$stamp" = "ignore" ]; then [ $DEBUG ] && echo "Asked to ignore - exiting silently..." exit 0 fi WEBROOT=/var/www LOGROOT=/var/log/apache # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE function searchdir() { echo $2/VIRTUAL/search.$(dnsdomainname)/www/$3; } function webdirs() { find /etc/htdig -type f -name '*.conf' ! -name 'htdig.conf' -exec basename '{}' .conf \; ; } #function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; } #function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; } function htdig_cfg() { echo /etc/htdig/$3.conf; } function pre_init() { true; } function post_init() { true; } function pre_update() { true; } function post_update() { true; } # The above can be overridden LOCALCONFIG=/etc/local/websearch.conf . $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG" # variables and functions too boring to be configurable HTDIG_BIN="/usr/bin/rundig" HTDIG_REAL_BIN="/usr/local/sbin/localrundig" #TODO: Convince Debian maintainer to change official rundig #function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; } # Webiste/independent checks test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist" test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist" # Index searches for websites from stdin or all default sites WEBSITES=$@ if [ "$WEBSITES" = "" ]; then WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE) fi for WEBSITE in $WEBSITES; do SEARCHDIR=$(searchdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve SEARCHDIR." # FIXME test -d $SEARCHDIR/.. || exit1 "Directory above SEARCHDIR doesn't exist." HTDIG_CFG=$(htdig_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve HTDIG_CFG." if [ $stamp = "init" ]; then [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT" pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT." fi if [ $stamp = "update" ]; then [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE" pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE." fi # HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host." # DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host." # HTDIG_OPTIONS="-a" #TODO: This seems to always init currently HTDIG_OPTIONS="" [ "$WEBSITE" != "htdig" ] && HTDIG_OPTIONS="$HTDIG_OPTIONS -c $HTDIG_CFG" if [ $DEBUG ]; then echo "Indexing search for $WEBSITE in $SEARCHDIR:" HTDIG_OPTIONS="$HTDIG_OPTIONS -v -s" else HTDIG_OPTIONS="$HTDIG_OPTIONS" fi if [ "$stamp" = "init" ]; then HTDIG_OPTIONS="$HTDIG_OPTIONS -i" fi if [ "$stamp" = "initprep" ]; then [ $DEBUG ] && echo "Mark next run as a full-scale, and move on to next site..." touch /etc/htdig/full_refresh_$WEBSITE continue fi # TODO if [ $stamp = "initXXX" ]; then [ $DEBUG ] && echo "$WEBSITE: Purge SEARCHDIR" rm -rf $SEARCHDIR mkdir $SEARCHDIR if [ -x $HTDIG_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update htdig config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $HTDIG_CFG.local HOSTNAME $HOST.$DOMAIN HOSTURL http://$HOST.$DOMAIN/ BASEURL http://$HOST.$DOMAIN LANGUAGE DANISH "\ > $HTDIG_CFG [ -s $HTDIG_CFG.local ] && cat $HTDIG_CFG.local >> $HTDIG_CFG mkdir $SEARCHDIR/htdig fi fi # if ! lockfile-create /var/run/localwebsearch_$WEBSITE; then # # Another htdig indexing cronjob is already running # [ $DEBUG ] && echo "Another $WEBSITE indexing is already running. Ignoring silently..." # continue # fi # lockfile-touch /var/run/localwebsearch_$WEBSITE & # # Save the PID of the lockfile-touch process # BADGER="$!" if [ $stamp = "init" -o $stamp = "update" ]; then if [ -x $HTDIG_BIN -a -x $HTDIG_REAL_BIN ]; then export TMPDIR=/tmp [ $DEBUG ] && echo "$WEBSITE: Update htdig search" if [ $stamp = "init" -o -f /etc/htdig/full_refresh_$WEBSITE ]; then $HTDIG_REAL_BIN $HTDIG_OPTIONS rm -f /etc/htdig/full_refresh_$WEBSITE else $HTDIG_REAL_BIN $HTDIG_OPTIONS fi fi fi # kill "${BADGER}" # lockfile-remove /var/run/localwebsearch_$WEBSITE if [ $stamp = "init" ]; then [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT" post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT." fi if [ $stamp = "update" ]; then [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE" post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE." fi done