summaryrefslogtreecommitdiff
path: root/localwebstats
blob: 3f27ada5087cf33561b5c2e90464622104034ca4 (plain)
  1. #!/bin/bash
  2. # /etc/cron.daily/localstats: Webstats maintenance script
  3. # Written by Jonas Smedegaard <dr@jones.dk>
  4. # halt on errors
  5. #set -e
  6. function usage() {
  7. echo "Usage: $(basename $0) init|reset|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
  8. echo " If no website is given, all are attempted"
  9. echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
  10. exit 1
  11. }
  12. function exit1() {
  13. echo "Error: $1"
  14. echo "Exiting..."
  15. exit 1
  16. }
  17. # automagically configure when run from cron dirs
  18. case $(dirname $0) in
  19. /etc/cron.daily)
  20. stamp=update
  21. ;;
  22. /etc/cron.weekly)
  23. stamp=ignore
  24. ;;
  25. /etc/cron.monthly)
  26. stamp=ignore
  27. ;;
  28. *)
  29. stamp=$1
  30. shift || usage
  31. ;;
  32. esac
  33. case "$stamp" in
  34. init|reset|update|prelogrotate|postlogrotate|ignore)
  35. ;;
  36. *)
  37. usage
  38. ;;
  39. esac
  40. if [ "$stamp" = "ignore" ]; then
  41. if [ $DEBUG ]; then
  42. echo "Asked to ignore - exiting silently..."
  43. fi
  44. exit 0
  45. fi
  46. WEBROOT=/var/www
  47. LOGROOT=/var/log/apache
  48. function statsdir() { echo $1/VIRTUAL/stats.$(dnsdomainname)/www/$2; }
  49. function webdirs() { echo `find /var/log/apache -type d -print`; }
  50. #function webdirs() { for dir in $(find $LOGROOT -type d -print); do echo $dir; done; }
  51. #function logdir() { echo $LOGROOT/$WEBSITE; }
  52. #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
  53. function logfiles() { ls $1/$2/????.??.?? | grep -v '\.00$'; }
  54. function logfilesgz() { ls $1/$2/????.??.??.gz | grep -v '\.00\.gz$'; }
  55. function logfilesresolved() { ls $1/$2/????.??.00; }
  56. function logfilesresolvedgz() { ls $1/$2/????.??.00.gz; }
  57. function host() { cat $1/VIRTUAL/$2/hostname || exit1 "Unable to get hostname for virtual host."; }
  58. function domain() { cat $1/VIRTUAL/$2/domainname || exit1 "Unable to get domainname for virtual host."; }
  59. function analog_cfg() { echo /etc/analog_$2.conf; }
  60. function webalizer_cfg() { echo /etc/webalizer_$2.conf; }
  61. # The above can be overridden in /etc/local/www
  62. . /etc/local/www || exit1 "Unable to read local preferences"
  63. # variables and functions too boring to be configurable
  64. JDRESOLVE_BIN="/usr/bin/jdresolve"
  65. JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
  66. #JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  67. JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  68. JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
  69. LOGRESOLVE_BIN="/usr/sbin/logresolve"
  70. ANALOG_BIN="/usr/bin/analog"
  71. WEBALIZER_BIN="/usr/bin/webalizer"
  72. AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
  73. function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
  74. # Webiste/independent checks
  75. test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
  76. test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
  77. if [ -x $JDRESOLVE_BIN ]; then
  78. touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
  79. fi
  80. # Generate stats for websites from stdin or all default sites
  81. WEBSITES=$@
  82. if [ "$WEBSITES" = "" ]; then
  83. WEBSITES=$(webdirs)
  84. fi
  85. for WEBSITE in $WEBSITES; do
  86. STATSDIR=$(statsdir $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
  87. # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
  88. ANALOG_CFG=$(analog_cfg $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
  89. WEBALIZER_CFG=$(webalizer_cfg $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
  90. HOST=$(host $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
  91. DOMAIN=$(domain $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
  92. LOGFILES=$(logfiles $LOGROOT $WEBSITE)
  93. LOGFILESGZ=$(logfilesgz $LOGROOT $WEBSITE)
  94. LOGFILESRESOLVED=$(logfilesresolved $LOGROOT $WEBSITE)
  95. LOGFILESRESOLVEDGZ=$(logfilesresolvedgz $LOGROOT $WEBSITE)
  96. ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
  97. WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
  98. AWSTATS_OPTIONS="-update"
  99. FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
  100. if [ $DEBUG ]; then
  101. echo "Making stats for $WEBSITE in $STATSDIR:"
  102. JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
  103. ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
  104. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
  105. AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
  106. else
  107. JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
  108. ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
  109. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
  110. AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
  111. fi
  112. if [ $stamp = "init" -o $stamp = "reset" ]; then
  113. [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
  114. rm -rf $STATSDIR
  115. mkdir $STATSDIR
  116. fi
  117. LOGDATARESOLVED="$STATSDIR/rawlog_resolved.txt"
  118. LOGDATA="$STATSDIR/rawlog_unresolved.txt"
  119. LOGDATATMP="$STATSDIR/rawlog_new.txt"
  120. touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
  121. if [ $stamp = "init" -o $stamp = "reset" ]; then
  122. for gzlog in $LOGFILESRESOLVEDGZ; do zcat $gzlog >> $LOGDATARESOLVED; done
  123. for log in $LOGFILESRESOLVED; do cat $log >> $LOGDATARESOLVED; done
  124. if [ -x $ANALOG_BIN ]; then
  125. if [ ! -f $ANALOG_CFG -o $stamp = "init" ]; then
  126. [ $DEBUG ] && echo "$WEBSITE: Create analog config"
  127. echo "\
  128. HOSTNAME $HOST.$DOMAIN
  129. HOSTURL http://$HOST.$DOMAIN/
  130. BASEURL http://$HOST.$DOMAIN
  131. LANGUAGE DANISH
  132. JPEGCHARTS ON
  133. "\
  134. > $ANALOG_CFG
  135. fi
  136. mkdir $STATSDIR/analog
  137. [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
  138. # $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"OUTFILE $STATSDIR/analog/index.html"
  139. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTPUT NONE"
  140. fi
  141. if [ -x $WEBALIZER_BIN ]; then
  142. if [ ! -f $WEBALIZER_CFG -o $stamp = "init" ]; then
  143. [ $DEBUG ] && echo "$WEBSITE: Create Webalizer config"
  144. echo "\
  145. HostName $HOST.$DOMAIN
  146. HideSite *$DOMAIN
  147. HideReferrer $DOMAIN/
  148. "\
  149. > $WEBALIZER_CFG
  150. fi
  151. mkdir $STATSDIR/webalizer
  152. [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
  153. cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  154. fi
  155. if [ -x $AWSTATS_BIN ]; then
  156. if [ ! -f /etc/awstats/awstats.$WEBSITE.conf -o $stamp = "init" ]; then
  157. [ $DEBUG ] && echo "$WEBSITE: Create AWStats config"
  158. echo "\
  159. LogFile=\"$LOGDATARESOLVED\"
  160. LogFormat=4
  161. DNSLookup=0
  162. DirData=\"/var/cache/awstats\"
  163. AllowToUpdateStatsFromBrowser=0
  164. DirCgi=\"http://cgi.jones.dk/cgi-bin\"
  165. DirIcons=\"http://stats.jones.dk/awstats-icon\"
  166. SiteDomain=\"$FQDN_ESC\"
  167. HostAliases=\"$FQDN_ESC\"
  168. Lang=\"dk\"
  169. DirLang=\"/usr/share/awstats/lang\"
  170. DefaultFile=\"index.html\"
  171. SkipHosts=\"\"
  172. SkipFiles=\"\"
  173. ShowLinksOnUrl=1
  174. ShowFlagLinks=0
  175. "\
  176. >/etc/awstats/awstats.$WEBSITE.conf
  177. fi
  178. mkdir $STATSDIR/awstats
  179. [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
  180. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
  181. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  182. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  183. fi
  184. [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
  185. gzip -9 $LOGDATARESOLVED
  186. fi
  187. if [ -x $JDRESOLVE_BIN ]; then
  188. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
  189. for gzlog in $LOGFILESGZ; do zcat $gzlog | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA; done
  190. for log in $LOGFILES; do cat $log | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA; done
  191. jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
  192. elif [ -x $LOGRESOLVE_BIN ]; then
  193. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
  194. for gzlog in $LOGFILESGZ; do zcat $gzlog | $LOGRESOLVE_BIN >> $LOGDATA; done
  195. for log in $LOGFILES; do cat $log | $LOGRESOLVE_BIN >> $LOGDATA; done
  196. else
  197. [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
  198. for gzlog in $LOGFILESGZ; do zcat $gzlog >> $LOGDATA; done
  199. for log in $LOGFILES; do cat $log >> $LOGDATA}; done
  200. fi
  201. if [ -x $ANALOG_BIN ]; then
  202. [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
  203. # TODO: use incremental update
  204. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED.gz" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
  205. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATA" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
  206. fi
  207. if [ -x $WEBALIZER_BIN ]; then
  208. [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
  209. zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  210. fi
  211. if [ -x $AWSTATS_BIN ]; then
  212. [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
  213. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  214. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  215. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  216. fi
  217. done