summaryrefslogtreecommitdiff
path: root/localwebstats
blob: 22bc9756f22cc718f1d3f4e07c8b62ce0cd0d500 (plain)
  1. #!/bin/bash
  2. #
  3. # /usr/local/sbin/localwebstats
  4. # Copyright 2001-2002 Jonas Smedegaard <dr@jones.dk>
  5. #
  6. # $Id: localwebstats,v 1.21 2002-09-12 17:34:37 jonas Exp $
  7. #
  8. # Webstats maintenance script
  9. #
  10. # Example config file (/etc/local/webstats.conf).
  11. #
  12. # --- CUT --- CUT --- CUT ---
  13. # #!/bin/sh
  14. #
  15. # # * Websites are in /home/<uid>/websites/<fqdn>
  16. # # * Apache httpd.conf has this line added:
  17. # # Include /etc/apache/vhosts.d
  18. # # * /usr/lib/apache/suexec is recompiled using /usr/local/bin/make-suexec-for-home
  19. # # * Each webhost has apache config in /etc/apache/vhosts.d/<fqdn>
  20. # # * /etc/apache/vhosts.d/<fqdn> has hints about host- and domain-part of fqdn:
  21. # # # webstats: hostname: <hostname>
  22. # # # webstats: domainname: <domainname>
  23. #
  24. # #WEBALIZER_OPTIONS="-Q"
  25. #
  26. # LOGROOT='/var/log/apache-vhosts'
  27. # WEBROOT='/home'
  28. #
  29. # function statsdir() { echo /home/jonas/websites/stats.$(dnsdomainname)/$3; }
  30. # function webdirs() { find /etc/apache/vhosts.d/ -type f -exec egrep '^#\W*webstats:' '{}' -l ';' | xargs basename; }
  31. # function host() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*hostname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
  32. # function domain() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*domainname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
  33. #
  34. # --- CUT --- CUT --- CUT ---
  35. #
  36. # halt on errors
  37. set -e
  38. function usage() {
  39. echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
  40. echo " If no website is given, all are attempted"
  41. echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
  42. exit 1
  43. }
  44. function exit1() {
  45. echo "Error: $1"
  46. echo "Exiting..."
  47. exit 1
  48. }
  49. # automagically configure when run from cron dirs
  50. case $(dirname $0) in
  51. /etc/cron.daily)
  52. stamp=update
  53. ;;
  54. /etc/cron.weekly)
  55. stamp=ignore
  56. ;;
  57. /etc/cron.monthly)
  58. stamp=ignore
  59. ;;
  60. *)
  61. stamp=$1
  62. shift || usage
  63. ;;
  64. esac
  65. case "$stamp" in
  66. init|update|prelogrotate|postlogrotate|ignore)
  67. ;;
  68. *)
  69. usage
  70. ;;
  71. esac
  72. if [ "$stamp" = "ignore" ]; then
  73. [ $DEBUG ] && echo "Asked to ignore - exiting silently..."
  74. exit 0
  75. fi
  76. ROOT=1
  77. user=www-data
  78. group=www-data
  79. LOGROOT=/var/log/apache
  80. WEBROOT=/var/www
  81. # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
  82. function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
  83. function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
  84. #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
  85. function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  86. function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  87. function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
  88. function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
  89. function analog_cfg() { echo /etc/analog_$3.conf; }
  90. function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
  91. function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
  92. function modlogan_cfg() { echo /etc/modlogan/modlogan_$3.conf; }
  93. function pre_init() { true; }
  94. function post_init() { true; }
  95. function pre_update() { true; }
  96. function post_update() { true; }
  97. # The above can be overridden
  98. LOCALCONFIG=/etc/local/webstats.conf
  99. . $LOCALCONFIG || echo "WARNING: Unable to read config file $LOCALCONFIG"
  100. # variables and functions too boring to be configurable
  101. JDRESOLVE_BIN="/usr/bin/jdresolve"
  102. JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
  103. #JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  104. # DB access doesn't work currently (no output...)
  105. #JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  106. JDRESOLVE_OPTIONS="-r"
  107. JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
  108. LOGRESOLVE_BIN="/usr/sbin/logresolve"
  109. ANALOG_BIN="/usr/bin/analog"
  110. RMAGIC_BIN="/usr/bin/rmagic"
  111. WEBALIZER_BIN="/usr/bin/webalizer"
  112. MODLOGAN_BIN="/usr/bin/modlogan"
  113. AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
  114. function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
  115. # Webiste/independent checks
  116. test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
  117. test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
  118. if [ -x $JDRESOLVE_BIN ]; then
  119. touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
  120. fi
  121. # Generate stats for websites from stdin or all default sites
  122. WEBSITES=$@
  123. if [ "$WEBSITES" = "" ]; then
  124. WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
  125. fi
  126. for WEBSITE in $WEBSITES; do
  127. STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
  128. # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
  129. ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
  130. RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
  131. WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
  132. MODLOGAN_CFG=$(modlogan_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve MODLOGAN_CFG."
  133. if [ $stamp = "init" ]; then
  134. [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
  135. pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
  136. fi
  137. if [ $stamp = "update" ]; then
  138. [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
  139. pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
  140. fi
  141. HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
  142. DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
  143. ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
  144. RMAGIC_OPTIONS=""
  145. WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
  146. MODLOGAN_OPTIONS="-c $MODLOGAN_CFG"
  147. AWSTATS_OPTIONS="-update"
  148. FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
  149. if [ $DEBUG ]; then
  150. echo "Making stats for $WEBSITE in $STATSDIR:"
  151. JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
  152. ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
  153. RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
  154. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
  155. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  156. AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
  157. else
  158. JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
  159. ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
  160. RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
  161. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
  162. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  163. AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
  164. fi
  165. if [ $stamp = "init" ]; then
  166. [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
  167. rm -rf $STATSDIR
  168. mkdir $STATSDIR
  169. fi
  170. LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
  171. LOGDATA="$STATSDIR/rawlog_new.txt"
  172. LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
  173. touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
  174. #FIXME run init if folders doesn't exist
  175. if [ $stamp = "init" ]; then
  176. logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
  177. if [ -x $ANALOG_BIN ]; then
  178. [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
  179. echo "\
  180. # NB! This file is automatically generated. Do not edit directly!
  181. # Instead, put additions/overrides in $ANALOG_CFG.local
  182. HOSTNAME $HOST.$DOMAIN
  183. HOSTURL http://$HOST.$DOMAIN/
  184. BASEURL http://$HOST.$DOMAIN
  185. LANGUAGE DANISH
  186. "\
  187. > $ANALOG_CFG
  188. [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
  189. [ -d $STATSDIR/analog ] || mkdir $STATSDIR/analog
  190. [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
  191. rm -f $STATSDIR/analog/cache.data
  192. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
  193. fi
  194. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  195. [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
  196. echo "\
  197. # NB! This file is automatically generated. Do not edit directly!
  198. # Instead, put additions/overrides in $RMAGIC_CFG.local
  199. [statistics]
  200. File_In = $STATSDIR/rmagic/report.dat
  201. Frame_File_Out = $STATSDIR/rmagic/index.html
  202. Language = en
  203. [reports]
  204. File_Out = $STATSDIR/rmagic/
  205. [QUICK]
  206. Rows = ALL
  207. [navigation]
  208. File_Out = navfile.html
  209. "\
  210. > $RMAGIC_CFG
  211. [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
  212. [ -d $STATSDIR/rmagic ] || mkdir $STATSDIR/rmagic
  213. [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
  214. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  215. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  216. fi
  217. if [ -x $WEBALIZER_BIN ]; then
  218. [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
  219. echo "\
  220. # NB! This file is automatically generated. Do not edit directly!
  221. # Instead, put additions/overrides in $WEBALIZER_CFG.local
  222. HostName $HOST.$DOMAIN
  223. HideSite *$DOMAIN
  224. HideReferrer $DOMAIN/
  225. "\
  226. > $WEBALIZER_CFG
  227. [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
  228. [ -d $STATSDIR/webalizer ] || mkdir $STATSDIR/webalizer
  229. [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
  230. cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
  231. fi
  232. if [ -x $MODLOGAN_BIN ]; then
  233. [ $DEBUG ] && echo "$WEBSITE: Create/update ModLogAn config"
  234. echo "\
  235. # NB! This file is automatically generated. Do not edit directly!
  236. # Instead, put additions/overrides in $MODLOGAN_CFG.local
  237. [global]
  238. includepath = /etc/modlogan
  239. include = modlogan.def.conf,global
  240. loadplugin = input_clf
  241. loadplugin = output_modlogan
  242. loadplugin = processor_web
  243. outputdir=$STATSDIR/modlogan
  244. incremental = 0
  245. debug_level = 0
  246. [processor_web]
  247. include = modlogan.def.conf,processor_web
  248. searchengines = match.searchengines.conf
  249. debug_searchengines = 0
  250. hidereferrer = \"^http://$HOST.$DOMAIN\"
  251. [output_modlogan]
  252. include = modlogan.def.conf,output_modlogan
  253. hostname = http://$HOST.$DOMAIN
  254. [input_clf]
  255. include = match.os.conf,matchos
  256. include = match.ua.conf,matchua
  257. inputfile = -
  258. "\
  259. > $MODLOGAN_CFG
  260. [ -s $MODLOGAN_CFG.local ] && cat $MODLOGAN_CFG.local >> $MODLOGAN_CFG
  261. [ -d $STATSDIR/modlogan ] || mkdir $STATSDIR/modlogan
  262. [ $ROOT ] && chown $user:$group $STATSDIR/modlogan
  263. [ $DEBUG ] && echo "$WEBSITE: Create initial ModLogAn stats"
  264. if [ $ROOT ]; then
  265. cat $LOGDATARESOLVED | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS" || true
  266. else
  267. cat $LOGDATARESOLVED | $MODLOGAN_BIN $MODLOGAN_OPTIONS || true
  268. fi
  269. fi
  270. if [ -x $AWSTATS_BIN ]; then
  271. [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
  272. echo "\
  273. # NB! This file is automatically generated. Do not edit directly!
  274. # Instead, put additions/overrides in $AWSTATS_CFG.local
  275. LogFile=\"$LOGDATATMP\"
  276. LogFormat=1
  277. DNSLookup=0
  278. DirData=\"$STATSDIR/awstats\"
  279. AllowToUpdateStatsFromBrowser=0
  280. DirCgi=\"http://cgi.jones.dk/cgi-bin\"
  281. DirIcons=\"http://stats.jones.dk/awstats-icon\"
  282. SiteDomain=\"$FQDN_ESC\"
  283. HostAliases=\"$FQDN_ESC\"
  284. Lang=\"dk\"
  285. DirLang=\"/usr/share/awstats/lang\"
  286. DefaultFile=\"index.html\"
  287. SkipHosts=\"\"
  288. SkipFiles=\"\"
  289. ShowLinksOnUrl=1
  290. ShowFlagLinks=0
  291. "\
  292. >/etc/awstats/awstats.$WEBSITE.conf
  293. [ -d $STATSDIR/awstats ] || mkdir $STATSDIR/awstats
  294. [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
  295. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
  296. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  297. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  298. fi
  299. [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
  300. gzip -9 $LOGDATARESOLVED
  301. fi
  302. if [ -x $JDRESOLVE_BIN ]; then
  303. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
  304. # DB access doesn't work currently (no output...)
  305. # logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  306. # jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
  307. logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  308. elif [ -x $LOGRESOLVE_BIN ]; then
  309. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
  310. logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
  311. else
  312. [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
  313. logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
  314. fi
  315. if [ -s $LOGDATA ]; then
  316. if [ -x $ANALOG_BIN ]; then
  317. [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
  318. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
  319. fi
  320. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  321. [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
  322. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  323. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  324. fi
  325. if [ -x $WEBALIZER_BIN ]; then
  326. [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
  327. cat $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  328. fi
  329. if [ -x $MODLOGAN_BIN ]; then
  330. [ $DEBUG ] && echo "$WEBSITE: Update ModLogAn stats"
  331. if [ $ROOT ]; then
  332. cat $LOGDATA | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS" >/dev/null
  333. else
  334. cat $LOGDATA | $MODLOGAN_BIN $MODLOGAN_OPTIONS >/dev/null
  335. fi
  336. fi
  337. if [ -x $AWSTATS_BIN ]; then
  338. [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
  339. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  340. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  341. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  342. fi
  343. gzip -f9 $LOGDATA
  344. fi
  345. if [ $stamp = "init" ]; then
  346. [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
  347. post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
  348. fi
  349. if [ $stamp = "update" ]; then
  350. [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
  351. post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
  352. fi
  353. done