#!/bin/bash # # /usr/local/sbin/localwebstats # Copyright 2001-2002 Jonas Smedegaard # # $Id: localwebstats,v 1.40 2004-12-29 17:14:23 jonas Exp $ # # Webstats maintenance script # # Example config file (/etc/local/webstats.conf). # # --- CUT --- CUT --- CUT --- # #!/bin/sh # # # * Websites are in /home//websites/ # # * Apache httpd.conf has this line added: # # Include /etc/apache/vhosts.d # # * Apache use /usr/local/bin/parselog into /var/log/apache-vhosts/ # # * /usr/lib/apache/suexec is recompiled using /usr/local/bin/make-suexec-for-home # # * Each webhost has apache config in /etc/apache/vhosts.d/ # # * /etc/apache/vhosts.d/ has hints about host- and domain-part of fqdn: # # # webstats: hostname: # # # webstats: domainname: # # #WEBALIZER_OPTIONS="-Q" # # LOGROOT='/var/log/apache-vhosts' # WEBROOT='/home' # # function statsdir() { echo /home/jonas/websites/stats.$(dnsdomainname)/$3; } # function webdirs() { find /etc/apache/vhosts.d/ -type f -exec egrep '^#\W*webstats:' '{}' -l ';' | xargs basename; } # function host() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*hostname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; } # function domain() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*domainname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; } # # --- CUT --- CUT --- CUT --- # # TODO: Run as non-provoleged user # # halt on errors set -e function usage() { echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore [...]" echo " If no website is given, all are attempted" echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/" exit 1 } function exit1() { echo "Error: $1" echo "Exiting..." exit 1 } # automagically configure when run from cron dirs case $(dirname $0) in /etc/cron.daily) stamp=update ;; /etc/cron.weekly) stamp=ignore ;; /etc/cron.monthly) stamp=ignore ;; *) stamp=$1 shift || usage ;; esac case "$stamp" in init|update|prelogrotate|postlogrotate|ignore) ;; *) usage ;; esac if [ "$stamp" = "ignore" ]; then [ $DEBUG ] && echo "Asked to ignore - exiting silently..." exit 0 fi ROOT=1 user=www-data group=www-data LOGROOT=/var/log/apache-vhosts WEBROOT=/var/www # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; } function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; } #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); } function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; } function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; } function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; } function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; } function analog_cfg() { echo /etc/analog_$3.conf; } function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; } function webalizer_cfg() { echo /etc/webalizer_$3.conf; } function modlogan_cfg() { echo /etc/modlogan/modlogan_$3.conf; } function pre_init() { true; } function post_init() { statsdir="`statsdir $1 $2 $3`"; test -f $statsdir/../COMMON/index.html && cp $statsdir/../COMMON/index.html $statsdir/ || /bin/true; } function pre_update() { true; } function post_update() { true; } # The above can be overridden LOCALCONFIG=/etc/local/webstats.conf . $LOCALCONFIG || echo "WARNING: Unable to read config file $LOCALCONFIG" # variables and functions too boring to be configurable JDRESOLVE_BIN="/usr/bin/jdresolve" JDRESOLVE_OPTIONS="-r -t 5" JDRESOLVE_DB="/var/cache/jdresolve/hosts.db" JDRESOLVE_EXPIRY="48" LOGRESOLVE_BIN="/usr/sbin/logresolve" ANALOG_BIN="/usr/bin/analog" RMAGIC_BIN="/usr/bin/rmagic" WEBALIZER_BIN="/usr/bin/webalizer" MODLOGAN_BIN="/usr/bin/modlogan" AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl" function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; } # Website/independent checks test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist" test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist" if [ -x $JDRESOLVE_BIN -a -n "$JDRESOLVE_DB" ]; then JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS --database=$JDRESOLVE_DB --dbfirst" test -d $(dirname "$JDRESOLVE_DB") || exit1 "Cache dir for jdresolve doesn't exist" if [ -n "$JDRESOLVE_EXPIRY" ]; then if [ $DEBUG ]; then jdresolve $JDRESOLVE_OPTIONS --expiredb="$JDRESOLVE_EXPIRY" else jdresolve $JDRESOLVE_OPTIONS --expiredb="$JDRESOLVE_EXPIRY" > /dev/null 2>&1 fi fi fi # Generate stats for websites from stdin or all default sites WEBSITES=$@ if [ "$WEBSITES" = "" ]; then WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE) fi for WEBSITE in $WEBSITES; do STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR." # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist." ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG." RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG." WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG." MODLOGAN_CFG=$(modlogan_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve MODLOGAN_CFG." if [ $stamp = "init" ]; then [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT" pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT." fi if [ $stamp = "update" ]; then [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE" pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE." fi HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host." DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host." ANALOG_OPTIONS="-G +g/etc/analog.cfg +g$ANALOG_CFG +A -a" RMAGIC_OPTIONS="" WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f" MODLOGAN_OPTIONS="-c $MODLOGAN_CFG" MODLOGAN_TAIL="2>&1 | egrep -v '^(modlogan [\.0-9]+|.+: startup - finished|[[:space:]]*(\[\.*|[[:space:]]+[0-9]+|\.*\])+|.*unknown country code: (gbl|lcl|tld) .+|.+: No such file or directory, first run \?)$' || true if [ $? \< 2 ]" AWSTATS_OPTIONS="-update" FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config if [ $DEBUG ]; then echo "Making stats for $WEBSITE in $STATSDIR:" JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p" ANALOG_OPTIONS="$ANALOG_OPTIONS +q" RMAGIC_OPTIONS="$RMAGIC_OPTIONS" WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T" MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS" MODLOGAN_TAIL="" AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps" else JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -n" ANALOG_OPTIONS="$ANALOG_OPTIONS -q" RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE" WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q" MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS" MODLOGAN_TAIL="$MODLOGAN_TAIL" AWSTATS_OPTIONS="$AWSTATS_OPTIONS" fi if [ $stamp = "init" ]; then [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR" rm -rf $STATSDIR mkdir $STATSDIR fi LOGDATARESOLVED="$STATSDIR/rawlog_old.txt" LOGDATA="$STATSDIR/rawlog_new.txt" LOGDATATMP="$STATSDIR/rawlog_incoming.txt" touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files." #FIXME run init if folders doesn't exist if [ $stamp = "init" ]; then logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED if [ -x $ANALOG_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update analog config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $ANALOG_CFG.local HOSTNAME $HOST.$DOMAIN HOSTURL http://$HOST.$DOMAIN/ BASEURL http://$HOST.$DOMAIN LANGUAGE DANISH "\ > $ANALOG_CFG [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG [ -d $STATSDIR/analog ] || mkdir $STATSDIR/analog [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats" rm -f $STATSDIR/analog/cache.data $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html" fi if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $RMAGIC_CFG.local [statistics] File_In = $STATSDIR/rmagic/report.dat Frame_File_Out = $STATSDIR/rmagic/index.html Language = en [reports] File_Out = $STATSDIR/rmagic/ [QUICK] Rows = ALL [navigation] File_Out = navfile.html "\ > $RMAGIC_CFG [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local" [ -d $STATSDIR/rmagic ] || mkdir $STATSDIR/rmagic [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats" $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG fi if [ -x $WEBALIZER_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $WEBALIZER_CFG.local HostName $HOST.$DOMAIN HideSite *$DOMAIN HideReferrer $DOMAIN/ "\ > $WEBALIZER_CFG [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG [ -d $STATSDIR/webalizer ] || mkdir $STATSDIR/webalizer [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats" cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true fi if [ -x $MODLOGAN_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update ModLogAn config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $MODLOGAN_CFG.local [global] includepath = /etc/modlogan include = modlogan.def.conf,global loadplugin = input_clf loadplugin = processor_web loadplugin = output_modlogan loadplugin = output_template statedir=$STATSDIR/modlogan incremental = 0 debug_level = 0 enable_resolver = 0 #read_ahead_limit = 1 var(outputdir, \$statedir) [processor_web] include = group.url.conf,group_exploits include = modlogan.def.conf,processor_web debug_searchengines = 0 debug_visits = 0 hidereferrer = \"\.${DOMAIN//./\.}/\" [output_modlogan] include = modlogan.def.conf, output_modlogan hostname = $HOST.$DOMAIN outputdir=\$outputdir [output_template] include = modlogan.def.conf, output_template include = modlogan.def.conf, output_template_reports_web include = modlogan.def.conf, output_template_menu_web template_path = /usr/local/share/modlogan/themes/ template_name = basic variable = HOSTNAME,$HOST.$DOMAIN variable = CHARSET,iso-8859-1 variable = LANGUAGE,da hostname = $HOST.$DOMAIN outputdir=\$outputdir-test [input_clf] include = modlogan.def.conf,input_clf inputfile = - "\ > $MODLOGAN_CFG [ -s $MODLOGAN_CFG.local ] && cat $MODLOGAN_CFG.local >> $MODLOGAN_CFG for dir in $STATSDIR/modlogan $STATSDIR/modlogan-test; do [ -d $dir ] || mkdir $dir [ $ROOT ] && chown $user:$group $dir done [ $DEBUG ] && echo "$WEBSITE: Create initial ModLogAn stats" if [ $ROOT ]; then cat $LOGDATARESOLVED | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL" else cat $LOGDATARESOLVED | $MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL fi fi if [ -x $AWSTATS_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config" echo "\ # NB! This file is automatically generated. Do not edit directly! # Instead, put additions/overrides in $AWSTATS_CFG.local LogFile=\"$LOGDATATMP\" LogFormat=1 DNSLookup=0 DirData=\"$STATSDIR/awstats\" AllowToUpdateStatsFromBrowser=0 DirCgi=\"http://cgi.jones.dk/cgi-bin\" DirIcons=\"http://stats.jones.dk/awstats-icon\" SiteDomain=\"$FQDN_ESC\" HostAliases=\"$FQDN_ESC\" Lang=\"dk\" DirLang=\"/usr/share/awstats/lang\" DefaultFile=\"index.html\" SkipHosts=\"\" SkipFiles=\"\" ShowLinksOnUrl=1 ShowFlagLinks=0 "\ >/etc/awstats/awstats.$WEBSITE.conf [ -d $STATSDIR/awstats ] || mkdir $STATSDIR/awstats [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats" awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP fi [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata" gzip -9 $LOGDATARESOLVED fi if [ -x $JDRESOLVE_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve" logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA elif [ -x $LOGRESOLVE_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve" logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA else [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)" logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA fi if [ -s $LOGDATA ]; then if [ -x $ANALOG_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Update analog stats" $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html" fi if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats" $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat" $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG fi if [ -x $WEBALIZER_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats" cat $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - fi if [ -x $MODLOGAN_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Update ModLogAn stats" if [ $ROOT ]; then cat $LOGDATA | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL" else cat $LOGDATA | $MODLOGAN_BIN $MODLOGAN_OPTIONS $MODLOGAN_TAIL fi fi if [ -x $AWSTATS_BIN ]; then [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats" awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP fi gzip -f9 $LOGDATA fi if [ $stamp = "init" ]; then [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT" post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT." fi if [ $stamp = "update" ]; then [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE" post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE." fi done