summaryrefslogtreecommitdiff
path: root/localwebsearch
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2002-01-27 22:44:13 +0000
committerJonas Smedegaard <dr@jones.dk>2002-01-27 22:44:13 +0000
commit554bf21ff2353107d6290c03ebf8eac14889f409 (patch)
treea71b3b7a1b7daa5114aaaff7f78cafac4b0ad875 /localwebsearch
parent8ddc93bf6bb816de1e279aa8cff379b248cede93 (diff)
Major overhaul of localwebsearch:
- Remove unused run-mode options. - Added run-mode "initprep" that marks next run as a full run (inspired by new design of htdig cron routines). - Properly structure run-modes, and use individual lockfiles for each WEBDIR. - Remove leftover stuff from localwebstats.
Diffstat (limited to 'localwebsearch')
-rwxr-xr-xlocalwebsearch292
1 files changed, 74 insertions, 218 deletions
diff --git a/localwebsearch b/localwebsearch
index 336d033..7c91266 100755
--- a/localwebsearch
+++ b/localwebsearch
@@ -6,7 +6,7 @@
set -e
function usage() {
- echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
+ echo "Usage: $(basename $0) initprep|init|update|ignore <website> [<website>...]"
echo " If no website is given, all are attempted"
echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
exit 1
@@ -24,7 +24,7 @@ case $(dirname $0) in
stamp=update
;;
/etc/cron.weekly)
- stamp=ignore
+ stamp=initprep
;;
/etc/cron.monthly)
stamp=ignore
@@ -36,13 +36,7 @@ case $(dirname $0) in
esac
case "$stamp" in
- init)
- if [ -f /usr/bin/rundig ]; then
- touch /etc/htdig/local_full_refresh
- fi
- exit 0
- ;;
- update|prelogrotate|postlogrotate|ignore)
+ initprep|init|update|ignore)
;;
*)
usage
@@ -50,276 +44,138 @@ case "$stamp" in
esac
if [ "$stamp" = "ignore" ]; then
- if [ $DEBUG ]; then
- echo "Asked to ignore - exiting silently..."
- fi
+ [ $DEBUG ] && echo "Asked to ignore - exiting silently..."
exit 0
fi
-if ! lockfile-create /var/run/localwebsearch.cron; then
- # Another htdig indexing cronjob is already running
- exit 0
-fi
-
-lockfile-touch /var/run/localwebsearch.cron &
-# Save the PID of the lockfile-touch process
-BADGER="$!"
-
-if [ -f /usr/bin/rundig ]; then
- for cfg in `find /etc/htdig -type f -name *.conf | grep -v "htdig.conf"`; do
- if [ -f /etc/htdig/local_full_refresh ]; then
- /usr/local/bin/localrundig -i -a
- rm /etc/htdig/local_full_refresh
- else
- /usr/local/bin/localrundig -a
- fi
- done
-fi
-
-kill "${BADGER}"
-lockfile-remove /var/run/localwebsearch.cron
-
-exit $?
-
WEBROOT=/var/www
LOGROOT=/var/log/apache
# Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
-function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
-function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
-#function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
-function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
-function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
-function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
-function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
-function analog_cfg() { echo /etc/analog_$3.conf; }
-function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
-function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
+function searchdir() { echo $2/VIRTUAL/search.$(dnsdomainname)/www/$3; }
+function webdirs() { find /etc/htdig -type f -name '*.conf' ! -name 'htdig.conf' -exec basename '{}' .conf \; ; }
+#function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
+#function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
+function htdig_cfg() { echo /etc/htdig/$3.conf; }
function pre_init() { true; }
function post_init() { true; }
function pre_update() { true; }
function post_update() { true; }
-# The above can be overridden in /etc/local/www
-
-. /etc/local/www || exit1 "Unable to read prefs file"
+# The above can be overridden
+LOCALCONFIG=/etc/local/websearch.conf
+. $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG"
# variables and functions too boring to be configurable
-JDRESOLVE_BIN="/usr/bin/jdresolve"
-JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
-#JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
-# DB access doesn't work currently (no output...)
-#JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
-JDRESOLVE_OPTIONS="-r"
-JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
-LOGRESOLVE_BIN="/usr/sbin/logresolve"
-ANALOG_BIN="/usr/bin/analog"
-RMAGIC_BIN="/usr/bin/rmagic"
-WEBALIZER_BIN="/usr/bin/webalizer"
-AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
+HTDIG_BIN="/usr/bin/rundig"
+HTDIG_REAL_BIN="/usr/local/sbin/localrundig" #TODO: Convince Debian maintainer to change official rundig
-function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
+#function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
# Webiste/independent checks
test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
-if [ -x $JDRESOLVE_BIN ]; then
- touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
-fi
-# Generate stats for websites from stdin or all default sites
+# Index searches for websites from stdin or all default sites
WEBSITES=$@
if [ "$WEBSITES" = "" ]; then
WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
fi
for WEBSITE in $WEBSITES; do
- STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
-# FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
+ SEARCHDIR=$(searchdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve SEARCHDIR."
+# FIXME test -d $SEARCHDIR/.. || exit1 "Directory above SEARCHDIR doesn't exist."
- ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
- RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
- WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
+ HTDIG_CFG=$(htdig_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve HTDIG_CFG."
if [ $stamp = "init" ]; then
+ [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
fi
if [ $stamp = "update" ]; then
+ [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
fi
- HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
- DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
+# HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
+# DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
- ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
- RMAGIC_OPTIONS=""
- WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
- AWSTATS_OPTIONS="-update"
- FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
+# HTDIG_OPTIONS="-a" #TODO: This seems to always init currently
+ HTDIG_OPTIONS=""
+ [ "$WEBSITE" != "htdig" ] && HTDIG_OPTIONS="$HTDIG_OPTIONS -c $HTDIG_CFG"
if [ $DEBUG ]; then
- echo "Making stats for $WEBSITE in $STATSDIR:"
- JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
- ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
- RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
- WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
- AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
+ echo "Indexing search for $WEBSITE in $SEARCHDIR:"
+ HTDIG_OPTIONS="$HTDIG_OPTIONS -v -s"
else
- JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
- ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
- RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
- WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
- AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
+ HTDIG_OPTIONS="$HTDIG_OPTIONS"
fi
- if [ $stamp = "init" -o $stamp = "init" ]; then
- [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
- rm -rf $STATSDIR
- mkdir $STATSDIR
+ if [ "$stamp" = "init" ]; then
+ HTDIG_OPTIONS="$HTDIG_OPTIONS -i"
fi
- LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
- LOGDATA="$STATSDIR/rawlog_new.txt"
- LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
- touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
+ if ! lockfile-create /var/run/localwebsearch_$WEBSITE; then
+ # Another htdig indexing cronjob is already running
+ [ $DEBUG ] && echo "Another $WEBSITE indexing is already running. Exit silently..."
+ exit 0
+ fi
- if [ $stamp = "init" ]; then
- logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
- if [ -x $ANALOG_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
+ lockfile-touch /var/run/localwebsearch_$WEBSITE &
+ # Save the PID of the lockfile-touch process
+ BADGER="$!"
+
+ if [ "$stamp" = "initprep" ]; then
+ [ $DEBUG ] && echo "Mark next run as a full-scale, and exit silently..."
+ touch /etc/htdig/full_refresh_$WEBSITE
+ exit 0
+ fi
+
+# TODO
+ if [ $stamp = "initXXX" ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Purge SEARCHDIR"
+ rm -rf $SEARCHDIR
+ mkdir $SEARCHDIR
+ if [ -x $HTDIG_BIN ]; then
+ [ $DEBUG ] && echo "$WEBSITE: Create/update htdig config"
echo "\
# NB! This file is automatically generated. Do not edit directly!
-# Instead, put additions/overrides in $ANALOG_CFG.local
+# Instead, put additions/overrides in $HTDIG_CFG.local
HOSTNAME $HOST.$DOMAIN
HOSTURL http://$HOST.$DOMAIN/
BASEURL http://$HOST.$DOMAIN
LANGUAGE DANISH
"\
- > $ANALOG_CFG
- [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
- mkdir $STATSDIR/analog
- [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
- $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
- fi
- if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
- echo "\
-# NB! This file is automatically generated. Do not edit directly!
-# Instead, put additions/overrides in $RMAGIC_CFG.local
-
-[statistics]
-File_In = $STATSDIR/rmagic/report.dat
-Frame_File_Out = $STATSDIR/rmagic/index.html
-Language = en
-
-[reports]
-File_Out = $STATSDIR/rmagic/
-
-[QUICK]
-Rows = ALL
-
-[navigation]
-File_Out = navfile.html
-"\
- > $RMAGIC_CFG
- [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
- mkdir $STATSDIR/rmagic
- [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
- $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
- $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
- fi
- if [ -x $WEBALIZER_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
- echo "\
-# NB! This file is automatically generated. Do not edit directly!
-# Instead, put additions/overrides in $WEBALIZER_CFG.local
-
-HostName $HOST.$DOMAIN
-HideSite *$DOMAIN
-HideReferrer $DOMAIN/
-"\
- > $WEBALIZER_CFG
- [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
- mkdir $STATSDIR/webalizer
- [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
- cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
- fi
- if [ -x $AWSTATS_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
- echo "\
-# NB! This file is automatically generated. Do not edit directly!
-# Instead, put additions/overrides in $AWSTATS_CFG.local
-
-LogFile=\"$LOGDATATMP\"
-LogFormat=4
-DNSLookup=0
-DirData=\"$STATSDIR/awstats\"
-AllowToUpdateStatsFromBrowser=0
-DirCgi=\"http://cgi.jones.dk/cgi-bin\"
-DirIcons=\"http://stats.jones.dk/awstats-icon\"
-SiteDomain=\"$FQDN_ESC\"
-HostAliases=\"$FQDN_ESC\"
-Lang=\"dk\"
-DirLang=\"/usr/share/awstats/lang\"
-DefaultFile=\"index.html\"
-SkipHosts=\"\"
-SkipFiles=\"\"
-ShowLinksOnUrl=1
-ShowFlagLinks=0
-"\
- >/etc/awstats/awstats.$WEBSITE.conf
- [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
- mkdir $STATSDIR/awstats
- [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
- awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
- $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
- awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
+ > $HTDIG_CFG
+ [ -s $HTDIG_CFG.local ] && cat $HTDIG_CFG.local >> $HTDIG_CFG
+ mkdir $SEARCHDIR/htdig
fi
- [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
- gzip -9 $LOGDATARESOLVED
- fi
-
- if [ -x $JDRESOLVE_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
-# DB access doesn't work currently (no output...)
-# logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
-# jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
- logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
- elif [ -x $LOGRESOLVE_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
- logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
- else
- [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
- logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
fi
- if [ -s $LOGDATA ]; then
- if [ -x $ANALOG_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
- $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
+ if [ $stamp = "init" -o $stamp = "update" ]; then
+ if [ -x $HTDIG_BIN -a -x $HTDIG_REAL_BIN ]; then
+ export TMPDIR=/tmp
+ [ $DEBUG ] && echo "$WEBSITE: Update htdig search"
+ if [ $stamp = "init" -o -f /etc/htdig/full_refresh_$WEBSITE ]; then
+ $HTDIG_REAL_BIN $HTDIG_OPTIONS
+ rm -f /etc/htdig/full_refresh_$WEBSITE
+ else
+ $HTDIG_REAL_BIN $HTDIG_OPTIONS
+ fi
fi
- if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
- $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
- $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
- fi
- if [ -x $WEBALIZER_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
- zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
- fi
- if [ -x $AWSTATS_BIN ]; then
- [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
- awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
- $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
- awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
- fi
- gzip -f9 $LOGDATA
fi
+
if [ $stamp = "init" ]; then
+ [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
fi
if [ $stamp = "update" ]; then
+ [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
fi
+
+ kill "${BADGER}"
+ lockfile-remove /var/run/localwebsearch_$WEBSITE
+
done