summaryrefslogtreecommitdiff
path: root/localwebsearch
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2002-01-21 04:06:50 +0000
committerJonas Smedegaard <dr@jones.dk>2002-01-21 04:06:50 +0000
commit368a3a19ed6f6026cd3817e8f714f71b11601167 (patch)
treeaf1b813e9d8bdd385b7516f354dec438bb6eea03 /localwebsearch
parent5148c4cc699884c13895992a889d397afc60c64f (diff)
Changes to localrundig and begin work on localwebsearch (works from cron only).
Diffstat (limited to 'localwebsearch')
-rwxr-xr-xlocalwebsearch325
1 files changed, 325 insertions, 0 deletions
diff --git a/localwebsearch b/localwebsearch
new file mode 100755
index 0000000..336d033
--- /dev/null
+++ b/localwebsearch
@@ -0,0 +1,325 @@
+#!/bin/bash
+# /etc/cron.daily/localwebsearch: Websearch maintenance script
+# Written by Jonas Smedegaard <dr@jones.dk>
+
+# halt on errors (NB! this is a bashism...)
+set -e
+
+function usage() {
+ echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
+ echo " If no website is given, all are attempted"
+ echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
+ exit 1
+}
+
+function exit1() {
+ echo "Error: $1"
+ echo "Exiting..."
+ exit 1
+}
+
+# automagically configure when run from cron dirs
+case $(dirname $0) in
+ /etc/cron.daily)
+ stamp=update
+ ;;
+ /etc/cron.weekly)
+ stamp=ignore
+ ;;
+ /etc/cron.monthly)
+ stamp=ignore
+ ;;
+ *)
+ stamp=$1
+ shift || usage
+ ;;
+esac
+
+case "$stamp" in
+ init)
+ if [ -f /usr/bin/rundig ]; then
+ touch /etc/htdig/local_full_refresh
+ fi
+ exit 0
+ ;;
+ update|prelogrotate|postlogrotate|ignore)
+ ;;
+ *)
+ usage
+ ;;
+esac
+
+if [ "$stamp" = "ignore" ]; then
+ if [ $DEBUG ]; then
+ echo "Asked to ignore - exiting silently..."
+ fi
+ exit 0
+fi
+
+if ! lockfile-create /var/run/localwebsearch.cron; then
+ # Another htdig indexing cronjob is already running
+ exit 0
+fi
+
+lockfile-touch /var/run/localwebsearch.cron &
+# Save the PID of the lockfile-touch process
+BADGER="$!"
+
+if [ -f /usr/bin/rundig ]; then
+ for cfg in `find /etc/htdig -type f -name *.conf | grep -v "htdig.conf"`; do
+ if [ -f /etc/htdig/local_full_refresh ]; then
+ /usr/local/bin/localrundig -i -a
+ rm /etc/htdig/local_full_refresh
+ else
+ /usr/local/bin/localrundig -a
+ fi
+ done
+fi
+
+kill "${BADGER}"
+lockfile-remove /var/run/localwebsearch.cron
+
+exit $?
+
+WEBROOT=/var/www
+LOGROOT=/var/log/apache
+
+# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
+function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
+function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
+#function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
+function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
+function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
+function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
+function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
+function analog_cfg() { echo /etc/analog_$3.conf; }
+function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
+function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
+function pre_init() { true; }
+function post_init() { true; }
+function pre_update() { true; }
+function post_update() { true; }
+
+# The above can be overridden in /etc/local/www
+
+. /etc/local/www || exit1 "Unable to read prefs file"
+
+# variables and functions too boring to be configurable
+JDRESOLVE_BIN="/usr/bin/jdresolve"
+JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
+#JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
+# DB access doesn't work currently (no output...)
+#JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
+JDRESOLVE_OPTIONS="-r"
+JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
+LOGRESOLVE_BIN="/usr/sbin/logresolve"
+ANALOG_BIN="/usr/bin/analog"
+RMAGIC_BIN="/usr/bin/rmagic"
+WEBALIZER_BIN="/usr/bin/webalizer"
+AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
+
+function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
+
+# Webiste/independent checks
+test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
+test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
+if [ -x $JDRESOLVE_BIN ]; then
+ touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
+fi
+
+# Generate stats for websites from stdin or all default sites
+WEBSITES=$@
+if [ "$WEBSITES" = "" ]; then
+ WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
+fi
+
+for WEBSITE in $WEBSITES; do
+ STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
+# FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
+
+ ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
+ RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
+ WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
+
+ if [ $stamp = "init" ]; then
+ pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
+ fi
+ if [ $stamp = "update" ]; then
+ pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
+ fi
+
+ HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
+ DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
+
+ ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
+ RMAGIC_OPTIONS=""
+ WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
+ AWSTATS_OPTIONS="-update"
+ FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
+
+ if [ $DEBUG ]; then
+ echo "Making stats for $WEBSITE in $STATSDIR:"
+ JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
+ ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
+ RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
+ WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
+ AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
+ else
+ JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
+ ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
+ RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
+ WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
+ AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
+ fi
+
+ if [ $stamp = "init" -o $stamp = "init" ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
+ rm -rf $STATSDIR
+ mkdir $STATSDIR
+ fi
+
+ LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
+ LOGDATA="$STATSDIR/rawlog_new.txt"
+ LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
+ touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
+
+ if [ $stamp = "init" ]; then
+ logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
+ if [ -x $ANALOG_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
+ echo "\
+# NB! This file is automatically generated. Do not edit directly!
+# Instead, put additions/overrides in $ANALOG_CFG.local
+
+HOSTNAME $HOST.$DOMAIN
+HOSTURL http://$HOST.$DOMAIN/
+BASEURL http://$HOST.$DOMAIN
+LANGUAGE DANISH
+"\
+ > $ANALOG_CFG
+ [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
+ mkdir $STATSDIR/analog
+ [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
+ $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
+ fi
+ if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
+ echo "\
+# NB! This file is automatically generated. Do not edit directly!
+# Instead, put additions/overrides in $RMAGIC_CFG.local
+
+[statistics]
+File_In = $STATSDIR/rmagic/report.dat
+Frame_File_Out = $STATSDIR/rmagic/index.html
+Language = en
+
+[reports]
+File_Out = $STATSDIR/rmagic/
+
+[QUICK]
+Rows = ALL
+
+[navigation]
+File_Out = navfile.html
+"\
+ > $RMAGIC_CFG
+ [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
+ mkdir $STATSDIR/rmagic
+ [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
+ $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
+ $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
+ fi
+ if [ -x $WEBALIZER_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
+ echo "\
+# NB! This file is automatically generated. Do not edit directly!
+# Instead, put additions/overrides in $WEBALIZER_CFG.local
+
+HostName $HOST.$DOMAIN
+HideSite *$DOMAIN
+HideReferrer $DOMAIN/
+"\
+ > $WEBALIZER_CFG
+ [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
+ mkdir $STATSDIR/webalizer
+ [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
+ cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
+ fi
+ if [ -x $AWSTATS_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
+ echo "\
+# NB! This file is automatically generated. Do not edit directly!
+# Instead, put additions/overrides in $AWSTATS_CFG.local
+
+LogFile=\"$LOGDATATMP\"
+LogFormat=4
+DNSLookup=0
+DirData=\"$STATSDIR/awstats\"
+AllowToUpdateStatsFromBrowser=0
+DirCgi=\"http://cgi.jones.dk/cgi-bin\"
+DirIcons=\"http://stats.jones.dk/awstats-icon\"
+SiteDomain=\"$FQDN_ESC\"
+HostAliases=\"$FQDN_ESC\"
+Lang=\"dk\"
+DirLang=\"/usr/share/awstats/lang\"
+DefaultFile=\"index.html\"
+SkipHosts=\"\"
+SkipFiles=\"\"
+ShowLinksOnUrl=1
+ShowFlagLinks=0
+"\
+ >/etc/awstats/awstats.$WEBSITE.conf
+ [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
+ mkdir $STATSDIR/awstats
+ [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
+ awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
+ $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
+ awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
+ fi
+ [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
+ gzip -9 $LOGDATARESOLVED
+ fi
+
+ if [ -x $JDRESOLVE_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
+# DB access doesn't work currently (no output...)
+# logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
+# jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
+ logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
+ elif [ -x $LOGRESOLVE_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
+ logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
+ else
+ [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
+ logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
+ fi
+
+ if [ -s $LOGDATA ]; then
+ if [ -x $ANALOG_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
+ $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
+ fi
+ if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
+ $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
+ $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
+ fi
+ if [ -x $WEBALIZER_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
+ zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
+ fi
+ if [ -x $AWSTATS_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
+ awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
+ $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
+ awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
+ fi
+ gzip -f9 $LOGDATA
+ fi
+ if [ $stamp = "init" ]; then
+ post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
+ fi
+ if [ $stamp = "update" ]; then
+ post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
+ fi
+done