summaryrefslogtreecommitdiff
path: root/localwebstats
blob: 08ae8c7644bfacbf81313382dd744e5ba848ca52 (plain)
  1. #!/bin/bash
  2. #
  3. # /usr/local/sbin/localwebstats
  4. # Copyright 2001-2002 Jonas Smedegaard <dr@jones.dk>
  5. #
  6. # $Id: localwebstats,v 1.23 2002-11-04 22:36:16 jonas Exp $
  7. #
  8. # Webstats maintenance script
  9. #
  10. # Example config file (/etc/local/webstats.conf).
  11. #
  12. # --- CUT --- CUT --- CUT ---
  13. # #!/bin/sh
  14. #
  15. # # * Websites are in /home/<uid>/websites/<fqdn>
  16. # # * Apache httpd.conf has this line added:
  17. # # Include /etc/apache/vhosts.d
  18. # # * Apache use /usr/local/bin/parselog into /var/log/apache-vhosts/
  19. # # * /usr/lib/apache/suexec is recompiled using /usr/local/bin/make-suexec-for-home
  20. # # * Each webhost has apache config in /etc/apache/vhosts.d/<fqdn>
  21. # # * /etc/apache/vhosts.d/<fqdn> has hints about host- and domain-part of fqdn:
  22. # # # webstats: hostname: <hostname>
  23. # # # webstats: domainname: <domainname>
  24. #
  25. # #WEBALIZER_OPTIONS="-Q"
  26. #
  27. # LOGROOT='/var/log/apache-vhosts'
  28. # WEBROOT='/home'
  29. #
  30. # function statsdir() { echo /home/jonas/websites/stats.$(dnsdomainname)/$3; }
  31. # function webdirs() { find /etc/apache/vhosts.d/ -type f -exec egrep '^#\W*webstats:' '{}' -l ';' | xargs basename; }
  32. # function host() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*hostname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
  33. # function domain() { cat /etc/apache/vhosts.d/$3 | egrep '#\W*webstats:\W*domainname:\W*[\.[:alnum:]]+\W*$' | sed 's/^.*:\W\([\.[:alnum:]]\+\)\W*$/\1/'; }
  34. #
  35. # --- CUT --- CUT --- CUT ---
  36. #
  37. # TODO: Run as non-provoleged user
  38. #
  39. # halt on errors
  40. set -e
  41. function usage() {
  42. echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
  43. echo " If no website is given, all are attempted"
  44. echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
  45. exit 1
  46. }
  47. function exit1() {
  48. echo "Error: $1"
  49. echo "Exiting..."
  50. exit 1
  51. }
  52. # automagically configure when run from cron dirs
  53. case $(dirname $0) in
  54. /etc/cron.daily)
  55. stamp=update
  56. ;;
  57. /etc/cron.weekly)
  58. stamp=ignore
  59. ;;
  60. /etc/cron.monthly)
  61. stamp=ignore
  62. ;;
  63. *)
  64. stamp=$1
  65. shift || usage
  66. ;;
  67. esac
  68. case "$stamp" in
  69. init|update|prelogrotate|postlogrotate|ignore)
  70. ;;
  71. *)
  72. usage
  73. ;;
  74. esac
  75. if [ "$stamp" = "ignore" ]; then
  76. [ $DEBUG ] && echo "Asked to ignore - exiting silently..."
  77. exit 0
  78. fi
  79. ROOT=1
  80. user=www-data
  81. group=www-data
  82. LOGROOT=/var/log/apache-vhosts
  83. WEBROOT=/var/www
  84. # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
  85. function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
  86. function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
  87. #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
  88. function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  89. function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  90. function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
  91. function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
  92. function analog_cfg() { echo /etc/analog_$3.conf; }
  93. function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
  94. function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
  95. function modlogan_cfg() { echo /etc/modlogan/modlogan_$3.conf; }
  96. function pre_init() { true; }
  97. function post_init() { true; }
  98. function pre_update() { true; }
  99. function post_update() { true; }
  100. # The above can be overridden
  101. LOCALCONFIG=/etc/local/webstats.conf
  102. . $LOCALCONFIG || echo "WARNING: Unable to read config file $LOCALCONFIG"
  103. # variables and functions too boring to be configurable
  104. JDRESOLVE_BIN="/usr/bin/jdresolve"
  105. JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
  106. #JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  107. # DB access doesn't work currently (no output...)
  108. #JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  109. JDRESOLVE_OPTIONS="-r"
  110. JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
  111. LOGRESOLVE_BIN="/usr/sbin/logresolve"
  112. ANALOG_BIN="/usr/bin/analog"
  113. RMAGIC_BIN="/usr/bin/rmagic"
  114. WEBALIZER_BIN="/usr/bin/webalizer"
  115. MODLOGAN_BIN="/usr/bin/modlogan"
  116. AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
  117. function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
  118. # Webiste/independent checks
  119. test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
  120. test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
  121. if [ -x $JDRESOLVE_BIN ]; then
  122. touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
  123. fi
  124. # Generate stats for websites from stdin or all default sites
  125. WEBSITES=$@
  126. if [ "$WEBSITES" = "" ]; then
  127. WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
  128. fi
  129. for WEBSITE in $WEBSITES; do
  130. STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
  131. # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
  132. ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
  133. RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
  134. WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
  135. MODLOGAN_CFG=$(modlogan_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve MODLOGAN_CFG."
  136. if [ $stamp = "init" ]; then
  137. [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
  138. pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
  139. fi
  140. if [ $stamp = "update" ]; then
  141. [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
  142. pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
  143. fi
  144. HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
  145. DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
  146. ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
  147. RMAGIC_OPTIONS=""
  148. WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
  149. MODLOGAN_OPTIONS="-c $MODLOGAN_CFG"
  150. AWSTATS_OPTIONS="-update"
  151. FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
  152. if [ $DEBUG ]; then
  153. echo "Making stats for $WEBSITE in $STATSDIR:"
  154. JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
  155. ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
  156. RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
  157. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
  158. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  159. AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
  160. else
  161. JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
  162. ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
  163. RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
  164. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
  165. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  166. AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
  167. fi
  168. if [ $stamp = "init" ]; then
  169. [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
  170. rm -rf $STATSDIR
  171. mkdir $STATSDIR
  172. fi
  173. LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
  174. LOGDATA="$STATSDIR/rawlog_new.txt"
  175. LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
  176. touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
  177. #FIXME run init if folders doesn't exist
  178. if [ $stamp = "init" ]; then
  179. logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
  180. if [ -x $ANALOG_BIN ]; then
  181. [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
  182. echo "\
  183. # NB! This file is automatically generated. Do not edit directly!
  184. # Instead, put additions/overrides in $ANALOG_CFG.local
  185. HOSTNAME $HOST.$DOMAIN
  186. HOSTURL http://$HOST.$DOMAIN/
  187. BASEURL http://$HOST.$DOMAIN
  188. LANGUAGE DANISH
  189. "\
  190. > $ANALOG_CFG
  191. [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
  192. [ -d $STATSDIR/analog ] || mkdir $STATSDIR/analog
  193. [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
  194. rm -f $STATSDIR/analog/cache.data
  195. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
  196. fi
  197. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  198. [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
  199. echo "\
  200. # NB! This file is automatically generated. Do not edit directly!
  201. # Instead, put additions/overrides in $RMAGIC_CFG.local
  202. [statistics]
  203. File_In = $STATSDIR/rmagic/report.dat
  204. Frame_File_Out = $STATSDIR/rmagic/index.html
  205. Language = en
  206. [reports]
  207. File_Out = $STATSDIR/rmagic/
  208. [QUICK]
  209. Rows = ALL
  210. [navigation]
  211. File_Out = navfile.html
  212. "\
  213. > $RMAGIC_CFG
  214. [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
  215. [ -d $STATSDIR/rmagic ] || mkdir $STATSDIR/rmagic
  216. [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
  217. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  218. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  219. fi
  220. if [ -x $WEBALIZER_BIN ]; then
  221. [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
  222. echo "\
  223. # NB! This file is automatically generated. Do not edit directly!
  224. # Instead, put additions/overrides in $WEBALIZER_CFG.local
  225. HostName $HOST.$DOMAIN
  226. HideSite *$DOMAIN
  227. HideReferrer $DOMAIN/
  228. "\
  229. > $WEBALIZER_CFG
  230. [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
  231. [ -d $STATSDIR/webalizer ] || mkdir $STATSDIR/webalizer
  232. [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
  233. cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
  234. fi
  235. if [ -x $MODLOGAN_BIN ]; then
  236. [ $DEBUG ] && echo "$WEBSITE: Create/update ModLogAn config"
  237. echo "\
  238. # NB! This file is automatically generated. Do not edit directly!
  239. # Instead, put additions/overrides in $MODLOGAN_CFG.local
  240. [global]
  241. includepath = /etc/modlogan
  242. include = modlogan.def.conf,global
  243. loadplugin = input_clf
  244. loadplugin = output_modlogan
  245. loadplugin = processor_web
  246. outputdir=$STATSDIR/modlogan
  247. incremental = 0
  248. debug_level = 0
  249. [processor_web]
  250. include = modlogan.def.conf,processor_web
  251. searchengines = match.searchengines.conf
  252. debug_searchengines = 0
  253. hidereferrer = \"^http://$HOST.$DOMAIN\"
  254. [output_modlogan]
  255. include = modlogan.def.conf,output_modlogan
  256. hostname = http://$HOST.$DOMAIN
  257. [input_clf]
  258. include = match.os.conf,matchos
  259. include = match.ua.conf,matchua
  260. inputfile = -
  261. "\
  262. > $MODLOGAN_CFG
  263. [ -s $MODLOGAN_CFG.local ] && cat $MODLOGAN_CFG.local >> $MODLOGAN_CFG
  264. [ -d $STATSDIR/modlogan ] || mkdir $STATSDIR/modlogan
  265. [ $ROOT ] && chown $user:$group $STATSDIR/modlogan
  266. [ $DEBUG ] && echo "$WEBSITE: Create initial ModLogAn stats"
  267. if [ $ROOT ]; then
  268. cat $LOGDATARESOLVED | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS" || true
  269. else
  270. cat $LOGDATARESOLVED | $MODLOGAN_BIN $MODLOGAN_OPTIONS || true
  271. fi
  272. fi
  273. if [ -x $AWSTATS_BIN ]; then
  274. [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
  275. echo "\
  276. # NB! This file is automatically generated. Do not edit directly!
  277. # Instead, put additions/overrides in $AWSTATS_CFG.local
  278. LogFile=\"$LOGDATATMP\"
  279. LogFormat=1
  280. DNSLookup=0
  281. DirData=\"$STATSDIR/awstats\"
  282. AllowToUpdateStatsFromBrowser=0
  283. DirCgi=\"http://cgi.jones.dk/cgi-bin\"
  284. DirIcons=\"http://stats.jones.dk/awstats-icon\"
  285. SiteDomain=\"$FQDN_ESC\"
  286. HostAliases=\"$FQDN_ESC\"
  287. Lang=\"dk\"
  288. DirLang=\"/usr/share/awstats/lang\"
  289. DefaultFile=\"index.html\"
  290. SkipHosts=\"\"
  291. SkipFiles=\"\"
  292. ShowLinksOnUrl=1
  293. ShowFlagLinks=0
  294. "\
  295. >/etc/awstats/awstats.$WEBSITE.conf
  296. [ -d $STATSDIR/awstats ] || mkdir $STATSDIR/awstats
  297. [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
  298. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
  299. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  300. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  301. fi
  302. [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
  303. gzip -9 $LOGDATARESOLVED
  304. fi
  305. if [ -x $JDRESOLVE_BIN ]; then
  306. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
  307. # DB access doesn't work currently (no output...)
  308. # logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  309. # jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
  310. logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  311. elif [ -x $LOGRESOLVE_BIN ]; then
  312. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
  313. logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
  314. else
  315. [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
  316. logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
  317. fi
  318. if [ -s $LOGDATA ]; then
  319. if [ -x $ANALOG_BIN ]; then
  320. [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
  321. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
  322. fi
  323. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  324. [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
  325. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  326. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  327. fi
  328. if [ -x $WEBALIZER_BIN ]; then
  329. [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
  330. cat $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  331. fi
  332. if [ -x $MODLOGAN_BIN ]; then
  333. [ $DEBUG ] && echo "$WEBSITE: Update ModLogAn stats"
  334. if [ $ROOT ]; then
  335. cat $LOGDATA | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS" >/dev/null
  336. else
  337. cat $LOGDATA | $MODLOGAN_BIN $MODLOGAN_OPTIONS >/dev/null
  338. fi
  339. fi
  340. if [ -x $AWSTATS_BIN ]; then
  341. [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
  342. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  343. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  344. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  345. fi
  346. gzip -f9 $LOGDATA
  347. fi
  348. if [ $stamp = "init" ]; then
  349. [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
  350. post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
  351. fi
  352. if [ $stamp = "update" ]; then
  353. [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
  354. post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
  355. fi
  356. done