From 368a3a19ed6f6026cd3817e8f714f71b11601167 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Mon, 21 Jan 2002 04:06:50 +0000 Subject: Changes to localrundig and begin work on localwebsearch (works from cron only). --- localwebsearch | 325 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100755 localwebsearch (limited to 'localwebsearch') diff --git a/localwebsearch b/localwebsearch new file mode 100755 index 0000000..336d033 --- /dev/null +++ b/localwebsearch @@ -0,0 +1,325 @@ +#!/bin/bash +# /etc/cron.daily/localwebsearch: Websearch maintenance script +# Written by Jonas Smedegaard + +# halt on errors (NB! this is a bashism...) +set -e + +function usage() { + echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore [...]" + echo " If no website is given, all are attempted" + echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/" + exit 1 +} + +function exit1() { + echo "Error: $1" + echo "Exiting..." + exit 1 +} + +# automagically configure when run from cron dirs +case $(dirname $0) in + /etc/cron.daily) + stamp=update + ;; + /etc/cron.weekly) + stamp=ignore + ;; + /etc/cron.monthly) + stamp=ignore + ;; + *) + stamp=$1 + shift || usage + ;; +esac + +case "$stamp" in + init) + if [ -f /usr/bin/rundig ]; then + touch /etc/htdig/local_full_refresh + fi + exit 0 + ;; + update|prelogrotate|postlogrotate|ignore) + ;; + *) + usage + ;; +esac + +if [ "$stamp" = "ignore" ]; then + if [ $DEBUG ]; then + echo "Asked to ignore - exiting silently..." + fi + exit 0 +fi + +if ! lockfile-create /var/run/localwebsearch.cron; then + # Another htdig indexing cronjob is already running + exit 0 +fi + +lockfile-touch /var/run/localwebsearch.cron & +# Save the PID of the lockfile-touch process +BADGER="$!" + +if [ -f /usr/bin/rundig ]; then + for cfg in `find /etc/htdig -type f -name *.conf | grep -v "htdig.conf"`; do + if [ -f /etc/htdig/local_full_refresh ]; then + /usr/local/bin/localrundig -i -a + rm /etc/htdig/local_full_refresh + else + /usr/local/bin/localrundig -a + fi + done +fi + +kill "${BADGER}" +lockfile-remove /var/run/localwebsearch.cron + +exit $? + +WEBROOT=/var/www +LOGROOT=/var/log/apache + +# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE +function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; } +function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; } +#function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); } +function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; } +function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; } +function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; } +function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; } +function analog_cfg() { echo /etc/analog_$3.conf; } +function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; } +function webalizer_cfg() { echo /etc/webalizer_$3.conf; } +function pre_init() { true; } +function post_init() { true; } +function pre_update() { true; } +function post_update() { true; } + +# The above can be overridden in /etc/local/www + +. /etc/local/www || exit1 "Unable to read prefs file" + +# variables and functions too boring to be configurable +JDRESOLVE_BIN="/usr/bin/jdresolve" +JDRESOLVE_DB="/var/cache/jdresolve/hosts.db" +#JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48" +# DB access doesn't work currently (no output...) +#JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48" +JDRESOLVE_OPTIONS="-r" +JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB" +LOGRESOLVE_BIN="/usr/sbin/logresolve" +ANALOG_BIN="/usr/bin/analog" +RMAGIC_BIN="/usr/bin/rmagic" +WEBALIZER_BIN="/usr/bin/webalizer" +AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl" + +function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; } + +# Webiste/independent checks +test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist" +test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist" +if [ -x $JDRESOLVE_BIN ]; then + touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB" +fi + +# Generate stats for websites from stdin or all default sites +WEBSITES=$@ +if [ "$WEBSITES" = "" ]; then + WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE) +fi + +for WEBSITE in $WEBSITES; do + STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR." +# FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist." + + ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG." + RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG." + WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG." + + if [ $stamp = "init" ]; then + pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT." + fi + if [ $stamp = "update" ]; then + pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE." + fi + + HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host." + DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host." + + ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a" + RMAGIC_OPTIONS="" + WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f" + AWSTATS_OPTIONS="-update" + FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config + + if [ $DEBUG ]; then + echo "Making stats for $WEBSITE in $STATSDIR:" + JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p" + ANALOG_OPTIONS="$ANALOG_OPTIONS +q" + RMAGIC_OPTIONS="$RMAGIC_OPTIONS" + WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T" + AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps" + else + JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n" + ANALOG_OPTIONS="$ANALOG_OPTIONS -q" + RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE" + WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q" + AWSTATS_OPTIONS="$AWSTATS_OPTIONS" + fi + + if [ $stamp = "init" -o $stamp = "init" ]; then + [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR" + rm -rf $STATSDIR + mkdir $STATSDIR + fi + + LOGDATARESOLVED="$STATSDIR/rawlog_old.txt" + LOGDATA="$STATSDIR/rawlog_new.txt" + LOGDATATMP="$STATSDIR/rawlog_incoming.txt" + touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files." + + if [ $stamp = "init" ]; then + logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED + if [ -x $ANALOG_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Create/update analog config" + echo "\ +# NB! This file is automatically generated. Do not edit directly! +# Instead, put additions/overrides in $ANALOG_CFG.local + +HOSTNAME $HOST.$DOMAIN +HOSTURL http://$HOST.$DOMAIN/ +BASEURL http://$HOST.$DOMAIN +LANGUAGE DANISH +"\ + > $ANALOG_CFG + [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG + mkdir $STATSDIR/analog + [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats" + $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html" + fi + if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config" + echo "\ +# NB! This file is automatically generated. Do not edit directly! +# Instead, put additions/overrides in $RMAGIC_CFG.local + +[statistics] +File_In = $STATSDIR/rmagic/report.dat +Frame_File_Out = $STATSDIR/rmagic/index.html +Language = en + +[reports] +File_Out = $STATSDIR/rmagic/ + +[QUICK] +Rows = ALL + +[navigation] +File_Out = navfile.html +"\ + > $RMAGIC_CFG + [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local" + mkdir $STATSDIR/rmagic + [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats" + $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" + $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG + fi + if [ -x $WEBALIZER_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config" + echo "\ +# NB! This file is automatically generated. Do not edit directly! +# Instead, put additions/overrides in $WEBALIZER_CFG.local + +HostName $HOST.$DOMAIN +HideSite *$DOMAIN +HideReferrer $DOMAIN/ +"\ + > $WEBALIZER_CFG + [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG + mkdir $STATSDIR/webalizer + [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats" + cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true + fi + if [ -x $AWSTATS_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config" + echo "\ +# NB! This file is automatically generated. Do not edit directly! +# Instead, put additions/overrides in $AWSTATS_CFG.local + +LogFile=\"$LOGDATATMP\" +LogFormat=4 +DNSLookup=0 +DirData=\"$STATSDIR/awstats\" +AllowToUpdateStatsFromBrowser=0 +DirCgi=\"http://cgi.jones.dk/cgi-bin\" +DirIcons=\"http://stats.jones.dk/awstats-icon\" +SiteDomain=\"$FQDN_ESC\" +HostAliases=\"$FQDN_ESC\" +Lang=\"dk\" +DirLang=\"/usr/share/awstats/lang\" +DefaultFile=\"index.html\" +SkipHosts=\"\" +SkipFiles=\"\" +ShowLinksOnUrl=1 +ShowFlagLinks=0 +"\ + >/etc/awstats/awstats.$WEBSITE.conf + [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG + mkdir $STATSDIR/awstats + [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats" + awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED + $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html + awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP + fi + [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata" + gzip -9 $LOGDATARESOLVED + fi + + if [ -x $JDRESOLVE_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve" +# DB access doesn't work currently (no output...) +# logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA +# jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA + logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA + elif [ -x $LOGRESOLVE_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve" + logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA + else + [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)" + logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA + fi + + if [ -s $LOGDATA ]; then + if [ -x $ANALOG_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Update analog stats" + $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html" + fi + if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats" + $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" + $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG + fi + if [ -x $WEBALIZER_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats" + zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - + fi + if [ -x $AWSTATS_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats" + awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA + $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html + awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP + fi + gzip -f9 $LOGDATA + fi + if [ $stamp = "init" ]; then + post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT." + fi + if [ $stamp = "update" ]; then + post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE." + fi +done -- cgit v1.2.3