From 554bf21ff2353107d6290c03ebf8eac14889f409 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Sun, 27 Jan 2002 22:44:13 +0000 Subject: Major overhaul of localwebsearch: - Remove unused run-mode options. - Added run-mode "initprep" that marks next run as a full run (inspired by new design of htdig cron routines). - Properly structure run-modes, and use individual lockfiles for each WEBDIR. - Remove leftover stuff from localwebstats. --- localwebsearch | 292 +++++++++++++++------------------------------------------ 1 file changed, 74 insertions(+), 218 deletions(-) (limited to 'localwebsearch') diff --git a/localwebsearch b/localwebsearch index 336d033..7c91266 100755 --- a/localwebsearch +++ b/localwebsearch @@ -6,7 +6,7 @@ set -e function usage() { - echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore [...]" + echo "Usage: $(basename $0) initprep|init|update|ignore [...]" echo " If no website is given, all are attempted" echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/" exit 1 @@ -24,7 +24,7 @@ case $(dirname $0) in stamp=update ;; /etc/cron.weekly) - stamp=ignore + stamp=initprep ;; /etc/cron.monthly) stamp=ignore @@ -36,13 +36,7 @@ case $(dirname $0) in esac case "$stamp" in - init) - if [ -f /usr/bin/rundig ]; then - touch /etc/htdig/local_full_refresh - fi - exit 0 - ;; - update|prelogrotate|postlogrotate|ignore) + initprep|init|update|ignore) ;; *) usage @@ -50,276 +44,138 @@ case "$stamp" in esac if [ "$stamp" = "ignore" ]; then - if [ $DEBUG ]; then - echo "Asked to ignore - exiting silently..." - fi + [ $DEBUG ] && echo "Asked to ignore - exiting silently..." exit 0 fi -if ! lockfile-create /var/run/localwebsearch.cron; then - # Another htdig indexing cronjob is already running - exit 0 -fi - -lockfile-touch /var/run/localwebsearch.cron & -# Save the PID of the lockfile-touch process -BADGER="$!" - -if [ -f /usr/bin/rundig ]; then - for cfg in `find /etc/htdig -type f -name *.conf | grep -v "htdig.conf"`; do - if [ -f /etc/htdig/local_full_refresh ]; then - /usr/local/bin/localrundig -i -a - rm /etc/htdig/local_full_refresh - else - /usr/local/bin/localrundig -a - fi - done -fi - -kill "${BADGER}" -lockfile-remove /var/run/localwebsearch.cron - -exit $? - WEBROOT=/var/www LOGROOT=/var/log/apache # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE -function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; } -function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; } -#function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); } -function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; } -function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; } -function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; } -function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; } -function analog_cfg() { echo /etc/analog_$3.conf; } -function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; } -function webalizer_cfg() { echo /etc/webalizer_$3.conf; } +function searchdir() { echo $2/VIRTUAL/search.$(dnsdomainname)/www/$3; } +function webdirs() { find /etc/htdig -type f -name '*.conf' ! -name 'htdig.conf' -exec basename '{}' .conf \; ; } +#function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; } +#function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; } +function htdig_cfg() { echo /etc/htdig/$3.conf; } function pre_init() { true; } function post_init() { true; } function pre_update() { true; } function post_update() { true; } -# The above can be overridden in /etc/local/www - -. /etc/local/www || exit1 "Unable to read prefs file" +# The above can be overridden +LOCALCONFIG=/etc/local/websearch.conf +. $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG" # variables and functions too boring to be configurable -JDRESOLVE_BIN="/usr/bin/jdresolve" -JDRESOLVE_DB="/var/cache/jdresolve/hosts.db" -#JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48" -# DB access doesn't work currently (no output...) -#JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48" -JDRESOLVE_OPTIONS="-r" -JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB" -LOGRESOLVE_BIN="/usr/sbin/logresolve" -ANALOG_BIN="/usr/bin/analog" -RMAGIC_BIN="/usr/bin/rmagic" -WEBALIZER_BIN="/usr/bin/webalizer" -AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl" +HTDIG_BIN="/usr/bin/rundig" +HTDIG_REAL_BIN="/usr/local/sbin/localrundig" #TODO: Convince Debian maintainer to change official rundig -function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; } +#function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; } # Webiste/independent checks test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist" test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist" -if [ -x $JDRESOLVE_BIN ]; then - touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB" -fi -# Generate stats for websites from stdin or all default sites +# Index searches for websites from stdin or all default sites WEBSITES=$@ if [ "$WEBSITES" = "" ]; then WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE) fi for WEBSITE in $WEBSITES; do - STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR." -# FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist." + SEARCHDIR=$(searchdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve SEARCHDIR." +# FIXME test -d $SEARCHDIR/.. || exit1 "Directory above SEARCHDIR doesn't exist." - ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG." - RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG." - WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG." + HTDIG_CFG=$(htdig_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve HTDIG_CFG." if [ $stamp = "init" ]; then + [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT" pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT." fi if [ $stamp = "update" ]; then + [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE" pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE." fi - HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host." - DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host." +# HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host." +# DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host." - ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a" - RMAGIC_OPTIONS="" - WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f" - AWSTATS_OPTIONS="-update" - FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config +# HTDIG_OPTIONS="-a" #TODO: This seems to always init currently + HTDIG_OPTIONS="" + [ "$WEBSITE" != "htdig" ] && HTDIG_OPTIONS="$HTDIG_OPTIONS -c $HTDIG_CFG" if [ $DEBUG ]; then - echo "Making stats for $WEBSITE in $STATSDIR:" - JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p" - ANALOG_OPTIONS="$ANALOG_OPTIONS +q" - RMAGIC_OPTIONS="$RMAGIC_OPTIONS" - WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T" - AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps" + echo "Indexing search for $WEBSITE in $SEARCHDIR:" + HTDIG_OPTIONS="$HTDIG_OPTIONS -v -s" else - JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n" - ANALOG_OPTIONS="$ANALOG_OPTIONS -q" - RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE" - WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q" - AWSTATS_OPTIONS="$AWSTATS_OPTIONS" + HTDIG_OPTIONS="$HTDIG_OPTIONS" fi - if [ $stamp = "init" -o $stamp = "init" ]; then - [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR" - rm -rf $STATSDIR - mkdir $STATSDIR + if [ "$stamp" = "init" ]; then + HTDIG_OPTIONS="$HTDIG_OPTIONS -i" fi - LOGDATARESOLVED="$STATSDIR/rawlog_old.txt" - LOGDATA="$STATSDIR/rawlog_new.txt" - LOGDATATMP="$STATSDIR/rawlog_incoming.txt" - touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files." + if ! lockfile-create /var/run/localwebsearch_$WEBSITE; then + # Another htdig indexing cronjob is already running + [ $DEBUG ] && echo "Another $WEBSITE indexing is already running. Exit silently..." + exit 0 + fi - if [ $stamp = "init" ]; then - logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED - if [ -x $ANALOG_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Create/update analog config" + lockfile-touch /var/run/localwebsearch_$WEBSITE & + # Save the PID of the lockfile-touch process + BADGER="$!" + + if [ "$stamp" = "initprep" ]; then + [ $DEBUG ] && echo "Mark next run as a full-scale, and exit silently..." + touch /etc/htdig/full_refresh_$WEBSITE + exit 0 + fi + +# TODO + if [ $stamp = "initXXX" ]; then + [ $DEBUG ] && echo "$WEBSITE: Purge SEARCHDIR" + rm -rf $SEARCHDIR + mkdir $SEARCHDIR + if [ -x $HTDIG_BIN ]; then + [ $DEBUG ] && echo "$WEBSITE: Create/update htdig config" echo "\ # NB! This file is automatically generated. Do not edit directly! -# Instead, put additions/overrides in $ANALOG_CFG.local +# Instead, put additions/overrides in $HTDIG_CFG.local HOSTNAME $HOST.$DOMAIN HOSTURL http://$HOST.$DOMAIN/ BASEURL http://$HOST.$DOMAIN LANGUAGE DANISH "\ - > $ANALOG_CFG - [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG - mkdir $STATSDIR/analog - [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats" - $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html" - fi - if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config" - echo "\ -# NB! This file is automatically generated. Do not edit directly! -# Instead, put additions/overrides in $RMAGIC_CFG.local - -[statistics] -File_In = $STATSDIR/rmagic/report.dat -Frame_File_Out = $STATSDIR/rmagic/index.html -Language = en - -[reports] -File_Out = $STATSDIR/rmagic/ - -[QUICK] -Rows = ALL - -[navigation] -File_Out = navfile.html -"\ - > $RMAGIC_CFG - [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local" - mkdir $STATSDIR/rmagic - [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats" - $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" - $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG - fi - if [ -x $WEBALIZER_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config" - echo "\ -# NB! This file is automatically generated. Do not edit directly! -# Instead, put additions/overrides in $WEBALIZER_CFG.local - -HostName $HOST.$DOMAIN -HideSite *$DOMAIN -HideReferrer $DOMAIN/ -"\ - > $WEBALIZER_CFG - [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG - mkdir $STATSDIR/webalizer - [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats" - cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true - fi - if [ -x $AWSTATS_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config" - echo "\ -# NB! This file is automatically generated. Do not edit directly! -# Instead, put additions/overrides in $AWSTATS_CFG.local - -LogFile=\"$LOGDATATMP\" -LogFormat=4 -DNSLookup=0 -DirData=\"$STATSDIR/awstats\" -AllowToUpdateStatsFromBrowser=0 -DirCgi=\"http://cgi.jones.dk/cgi-bin\" -DirIcons=\"http://stats.jones.dk/awstats-icon\" -SiteDomain=\"$FQDN_ESC\" -HostAliases=\"$FQDN_ESC\" -Lang=\"dk\" -DirLang=\"/usr/share/awstats/lang\" -DefaultFile=\"index.html\" -SkipHosts=\"\" -SkipFiles=\"\" -ShowLinksOnUrl=1 -ShowFlagLinks=0 -"\ - >/etc/awstats/awstats.$WEBSITE.conf - [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG - mkdir $STATSDIR/awstats - [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats" - awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED - $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html - awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP + > $HTDIG_CFG + [ -s $HTDIG_CFG.local ] && cat $HTDIG_CFG.local >> $HTDIG_CFG + mkdir $SEARCHDIR/htdig fi - [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata" - gzip -9 $LOGDATARESOLVED - fi - - if [ -x $JDRESOLVE_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve" -# DB access doesn't work currently (no output...) -# logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA -# jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA - logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA - elif [ -x $LOGRESOLVE_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve" - logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA - else - [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)" - logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA fi - if [ -s $LOGDATA ]; then - if [ -x $ANALOG_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Update analog stats" - $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html" + if [ $stamp = "init" -o $stamp = "update" ]; then + if [ -x $HTDIG_BIN -a -x $HTDIG_REAL_BIN ]; then + export TMPDIR=/tmp + [ $DEBUG ] && echo "$WEBSITE: Update htdig search" + if [ $stamp = "init" -o -f /etc/htdig/full_refresh_$WEBSITE ]; then + $HTDIG_REAL_BIN $HTDIG_OPTIONS + rm -f /etc/htdig/full_refresh_$WEBSITE + else + $HTDIG_REAL_BIN $HTDIG_OPTIONS + fi fi - if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats" - $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" - $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG - fi - if [ -x $WEBALIZER_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats" - zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - - fi - if [ -x $AWSTATS_BIN ]; then - [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats" - awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA - $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html - awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP - fi - gzip -f9 $LOGDATA fi + if [ $stamp = "init" ]; then + [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT" post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT." fi if [ $stamp = "update" ]; then + [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE" post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE." fi + + kill "${BADGER}" + lockfile-remove /var/run/localwebsearch_$WEBSITE + done -- cgit v1.2.3