summaryrefslogtreecommitdiff
path: root/localwebstats
blob: 437b0760b993844767c20b2f8e914f8e26b70b94 (plain)
  1. #!/bin/bash
  2. # /etc/cron.daily/localstats: Webstats maintenance script
  3. # Written by Jonas Smedegaard <dr@jones.dk>
  4. # halt on errors (NB! this is a bashism...)
  5. set -e
  6. function usage() {
  7. echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
  8. echo " If no website is given, all are attempted"
  9. echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
  10. exit 1
  11. }
  12. function exit1() {
  13. echo "Error: $1"
  14. echo "Exiting..."
  15. exit 1
  16. }
  17. # automagically configure when run from cron dirs
  18. case $(dirname $0) in
  19. /etc/cron.daily)
  20. stamp=update
  21. ;;
  22. /etc/cron.weekly)
  23. stamp=ignore
  24. ;;
  25. /etc/cron.monthly)
  26. stamp=ignore
  27. ;;
  28. *)
  29. stamp=$1
  30. shift || usage
  31. ;;
  32. esac
  33. case "$stamp" in
  34. init|update|prelogrotate|postlogrotate|ignore)
  35. ;;
  36. *)
  37. usage
  38. ;;
  39. esac
  40. if [ "$stamp" = "ignore" ]; then
  41. [ $DEBUG ] && echo "Asked to ignore - exiting silently..."
  42. exit 0
  43. fi
  44. ROOT=1
  45. user=www-data
  46. group=www-data
  47. LOGROOT=/var/log/apache
  48. WEBROOT=/var/www
  49. # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
  50. function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
  51. function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
  52. #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
  53. function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  54. function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow | sort); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow | sort); do cat $file; done; }
  55. function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
  56. function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
  57. function analog_cfg() { echo /etc/analog_$3.conf; }
  58. function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
  59. function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
  60. function modlogan_cfg() { echo /etc/modlogan/modlogan_$3.conf; }
  61. function pre_init() { true; }
  62. function post_init() { true; }
  63. function pre_update() { true; }
  64. function post_update() { true; }
  65. # The above can be overridden
  66. LOCALCONFIG=/etc/local/webstats.conf
  67. . $LOCALCONFIG || exit1 "Unable to read local config file $LOCALCONFIG"
  68. # variables and functions too boring to be configurable
  69. JDRESOLVE_BIN="/usr/bin/jdresolve"
  70. JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
  71. #JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  72. # DB access doesn't work currently (no output...)
  73. #JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  74. JDRESOLVE_OPTIONS="-r"
  75. JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
  76. LOGRESOLVE_BIN="/usr/sbin/logresolve"
  77. ANALOG_BIN="/usr/bin/analog"
  78. RMAGIC_BIN="/usr/bin/rmagic"
  79. WEBALIZER_BIN="/usr/bin/webalizer"
  80. MODLOGAN_BIN="/usr/bin/modlogan"
  81. AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
  82. function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
  83. # Webiste/independent checks
  84. test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
  85. test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
  86. if [ -x $JDRESOLVE_BIN ]; then
  87. touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
  88. fi
  89. # Generate stats for websites from stdin or all default sites
  90. WEBSITES=$@
  91. if [ "$WEBSITES" = "" ]; then
  92. WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
  93. fi
  94. for WEBSITE in $WEBSITES; do
  95. STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
  96. # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
  97. ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
  98. RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
  99. WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
  100. MODLOGAN_CFG=$(modlogan_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve MODLOGAN_CFG."
  101. if [ $stamp = "init" ]; then
  102. [ $DEBUG ] && echo "Execute $WEBSITE PRE_INIT"
  103. pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
  104. fi
  105. if [ $stamp = "update" ]; then
  106. [ $DEBUG ] && echo "Execute $WEBSITE PRE_UPDATE"
  107. pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
  108. fi
  109. HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
  110. DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
  111. ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
  112. RMAGIC_OPTIONS=""
  113. WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
  114. MODLOGAN_OPTIONS="-c $MODLOGAN_CFG"
  115. AWSTATS_OPTIONS="-update"
  116. FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
  117. if [ $DEBUG ]; then
  118. echo "Making stats for $WEBSITE in $STATSDIR:"
  119. JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
  120. ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
  121. RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
  122. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
  123. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  124. AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
  125. else
  126. JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
  127. ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
  128. RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
  129. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
  130. MODLOGAN_OPTIONS="$MODLOGAN_OPTIONS"
  131. AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
  132. fi
  133. if [ $stamp = "init" ]; then
  134. [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
  135. rm -rf $STATSDIR
  136. mkdir $STATSDIR
  137. fi
  138. LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
  139. LOGDATA="$STATSDIR/rawlog_new.txt"
  140. LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
  141. touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
  142. #FIXME run init if folders doesn't exist
  143. if [ $stamp = "init" ]; then
  144. logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
  145. if [ -x $ANALOG_BIN ]; then
  146. [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
  147. echo "\
  148. # NB! This file is automatically generated. Do not edit directly!
  149. # Instead, put additions/overrides in $ANALOG_CFG.local
  150. HOSTNAME $HOST.$DOMAIN
  151. HOSTURL http://$HOST.$DOMAIN/
  152. BASEURL http://$HOST.$DOMAIN
  153. LANGUAGE DANISH
  154. "\
  155. > $ANALOG_CFG
  156. [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
  157. [ -d $STATSDIR/analog ] || mkdir $STATSDIR/analog
  158. [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
  159. rm -f $STATSDIR/analog/cache.data
  160. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
  161. fi
  162. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  163. [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
  164. echo "\
  165. # NB! This file is automatically generated. Do not edit directly!
  166. # Instead, put additions/overrides in $RMAGIC_CFG.local
  167. [statistics]
  168. File_In = $STATSDIR/rmagic/report.dat
  169. Frame_File_Out = $STATSDIR/rmagic/index.html
  170. Language = en
  171. [reports]
  172. File_Out = $STATSDIR/rmagic/
  173. [QUICK]
  174. Rows = ALL
  175. [navigation]
  176. File_Out = navfile.html
  177. "\
  178. > $RMAGIC_CFG
  179. [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
  180. [ -d $STATSDIR/rmagic ] || mkdir $STATSDIR/rmagic
  181. [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
  182. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  183. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  184. fi
  185. if [ -x $WEBALIZER_BIN ]; then
  186. [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
  187. echo "\
  188. # NB! This file is automatically generated. Do not edit directly!
  189. # Instead, put additions/overrides in $WEBALIZER_CFG.local
  190. HostName $HOST.$DOMAIN
  191. HideSite *$DOMAIN
  192. HideReferrer $DOMAIN/
  193. "\
  194. > $WEBALIZER_CFG
  195. [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
  196. [ -d $STATSDIR/webalizer ] || mkdir $STATSDIR/webalizer
  197. [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
  198. cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
  199. fi
  200. if [ -x $MODLOGAN_BIN ]; then
  201. [ $DEBUG ] && echo "$WEBSITE: Create/update ModLogAn config"
  202. echo "\
  203. # NB! This file is automatically generated. Do not edit directly!
  204. # Instead, put additions/overrides in $MODLOGAN_CFG.local
  205. [global]
  206. includepath = /etc/modlogan
  207. include = modlogan.def.conf,global
  208. loadplugin = input_clf
  209. loadplugin = output_modlogan
  210. loadplugin = processor_web
  211. outputdir=$STATSDIR/modlogan
  212. incremental = 0
  213. debug_level = 0
  214. [processor_web]
  215. include = modlogan.def.conf,processor_web
  216. searchengines = match.searchengines.conf
  217. debug_searchengines = 0
  218. hidereferrer = \"^http://$HOST.$DOMAIN\"
  219. [output_modlogan]
  220. include = modlogan.def.conf,output_modlogan
  221. hostname = http://$HOST.$DOMAIN
  222. [input_clf]
  223. include = match.os.conf,matchos
  224. include = match.ua.conf,matchua
  225. inputfile = -
  226. "\
  227. > $MODLOGAN_CFG
  228. [ -s $MODLOGAN_CFG.local ] && cat $MODLOGAN_CFG.local >> $MODLOGAN_CFG
  229. [ -d $STATSDIR/modlogan ] || mkdir $STATSDIR/modlogan
  230. [ $ROOT ] && chown $user:$group $STATSDIR/modlogan
  231. [ $DEBUG ] && echo "$WEBSITE: Create initial ModLogAn stats"
  232. if [ $ROOT ]; then
  233. cat $LOGDATARESOLVED | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS" || true
  234. else
  235. cat $LOGDATARESOLVED | $MODLOGAN_BIN $MODLOGAN_OPTIONS || true
  236. fi
  237. fi
  238. if [ -x $AWSTATS_BIN ]; then
  239. [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
  240. echo "\
  241. # NB! This file is automatically generated. Do not edit directly!
  242. # Instead, put additions/overrides in $AWSTATS_CFG.local
  243. LogFile=\"$LOGDATATMP\"
  244. LogFormat=4
  245. DNSLookup=0
  246. DirData=\"$STATSDIR/awstats\"
  247. AllowToUpdateStatsFromBrowser=0
  248. DirCgi=\"http://cgi.jones.dk/cgi-bin\"
  249. DirIcons=\"http://stats.jones.dk/awstats-icon\"
  250. SiteDomain=\"$FQDN_ESC\"
  251. HostAliases=\"$FQDN_ESC\"
  252. Lang=\"dk\"
  253. DirLang=\"/usr/share/awstats/lang\"
  254. DefaultFile=\"index.html\"
  255. SkipHosts=\"\"
  256. SkipFiles=\"\"
  257. ShowLinksOnUrl=1
  258. ShowFlagLinks=0
  259. "\
  260. >/etc/awstats/awstats.$WEBSITE.conf
  261. [ -d $STATSDIR/awstats ] || mkdir $STATSDIR/awstats
  262. [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
  263. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
  264. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  265. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  266. fi
  267. [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
  268. gzip -9 $LOGDATARESOLVED
  269. fi
  270. if [ -x $JDRESOLVE_BIN ]; then
  271. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
  272. # DB access doesn't work currently (no output...)
  273. # logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  274. # jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
  275. logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  276. elif [ -x $LOGRESOLVE_BIN ]; then
  277. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
  278. logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
  279. else
  280. [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
  281. logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
  282. fi
  283. if [ -s $LOGDATA ]; then
  284. if [ -x $ANALOG_BIN ]; then
  285. [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
  286. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
  287. fi
  288. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  289. [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
  290. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  291. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  292. fi
  293. if [ -x $WEBALIZER_BIN ]; then
  294. [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
  295. cat $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  296. fi
  297. if [ -x $MODLOGAN_BIN ]; then
  298. [ $DEBUG ] && echo "$WEBSITE: Update ModLogAn stats"
  299. if [ $ROOT ]; then
  300. cat $LOGDATA | su $user -c "$MODLOGAN_BIN $MODLOGAN_OPTIONS"
  301. else
  302. cat $LOGDATA | $MODLOGAN_BIN $MODLOGAN_OPTIONS
  303. fi
  304. fi
  305. if [ -x $AWSTATS_BIN ]; then
  306. [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
  307. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  308. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  309. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  310. fi
  311. gzip -f9 $LOGDATA
  312. fi
  313. if [ $stamp = "init" ]; then
  314. [ $DEBUG ] && echo "Execute $WEBSITE POST_INIT"
  315. post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
  316. fi
  317. if [ $stamp = "update" ]; then
  318. [ $DEBUG ] && echo "Execute $WEBSITE POST_UPDATE"
  319. post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
  320. fi
  321. done