summaryrefslogtreecommitdiff
path: root/localwebsearch
blob: 336d0335e9160204c7d48ffb4067e0b9f925e1c7 (plain)
  1. #!/bin/bash
  2. # /etc/cron.daily/localwebsearch: Websearch maintenance script
  3. # Written by Jonas Smedegaard <dr@jones.dk>
  4. # halt on errors (NB! this is a bashism...)
  5. set -e
  6. function usage() {
  7. echo "Usage: $(basename $0) init|update|prelogrotate|postlogrotate|ignore <website> [<website>...]"
  8. echo " If no website is given, all are attempted"
  9. echo " Tip: Automagically runs when symlinked to /etc/cron.{daily,weekly,monthly}/"
  10. exit 1
  11. }
  12. function exit1() {
  13. echo "Error: $1"
  14. echo "Exiting..."
  15. exit 1
  16. }
  17. # automagically configure when run from cron dirs
  18. case $(dirname $0) in
  19. /etc/cron.daily)
  20. stamp=update
  21. ;;
  22. /etc/cron.weekly)
  23. stamp=ignore
  24. ;;
  25. /etc/cron.monthly)
  26. stamp=ignore
  27. ;;
  28. *)
  29. stamp=$1
  30. shift || usage
  31. ;;
  32. esac
  33. case "$stamp" in
  34. init)
  35. if [ -f /usr/bin/rundig ]; then
  36. touch /etc/htdig/local_full_refresh
  37. fi
  38. exit 0
  39. ;;
  40. update|prelogrotate|postlogrotate|ignore)
  41. ;;
  42. *)
  43. usage
  44. ;;
  45. esac
  46. if [ "$stamp" = "ignore" ]; then
  47. if [ $DEBUG ]; then
  48. echo "Asked to ignore - exiting silently..."
  49. fi
  50. exit 0
  51. fi
  52. if ! lockfile-create /var/run/localwebsearch.cron; then
  53. # Another htdig indexing cronjob is already running
  54. exit 0
  55. fi
  56. lockfile-touch /var/run/localwebsearch.cron &
  57. # Save the PID of the lockfile-touch process
  58. BADGER="$!"
  59. if [ -f /usr/bin/rundig ]; then
  60. for cfg in `find /etc/htdig -type f -name *.conf | grep -v "htdig.conf"`; do
  61. if [ -f /etc/htdig/local_full_refresh ]; then
  62. /usr/local/bin/localrundig -i -a
  63. rm /etc/htdig/local_full_refresh
  64. else
  65. /usr/local/bin/localrundig -a
  66. fi
  67. done
  68. fi
  69. kill "${BADGER}"
  70. lockfile-remove /var/run/localwebsearch.cron
  71. exit $?
  72. WEBROOT=/var/www
  73. LOGROOT=/var/log/apache
  74. # Options: $1=LOGROOT, $2=WEBROOT, $3=WEBSITE
  75. function statsdir() { echo $2/VIRTUAL/stats.$(dnsdomainname)/www/$3; }
  76. function webdirs() { find $1 -type d -mindepth 1 -maxdepth 1 | grep '\.*\.' | sed 's!$1!!'; }
  77. #function logfiles() { $(ls -r $LOGDIR/*-access*.gz) $(ls -r $LOGDIR/access*.??.gz) $(ls -r $LOGDIR/access*.?.gz) $(ls -r $LOGDIR/access*.?); }
  78. function logcontentresolved() { for file in $(find $1/$3 -name '????.??.00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
  79. function logcontent() { for file in $(find $1/$3 -name '????.??.??.gz' ! -name '*00.gz' -type f -mindepth 1 -maxdepth 1 -follow); do zcat $file; done; for file in $(find $1/$3 -name '????.??.??' ! -name '*00' -type f -mindepth 1 -maxdepth 1 -follow); do cat $file; done; }
  80. function host() { cat $2/VIRTUAL/$3/hostname || exit1 "Unable to get hostname for virtual host."; }
  81. function domain() { cat $2/VIRTUAL/$3/domainname || exit1 "Unable to get domainname for virtual host."; }
  82. function analog_cfg() { echo /etc/analog_$3.conf; }
  83. function rmagic_cfg() { echo /etc/rmagic/rmagic_$3.conf; }
  84. function webalizer_cfg() { echo /etc/webalizer_$3.conf; }
  85. function pre_init() { true; }
  86. function post_init() { true; }
  87. function pre_update() { true; }
  88. function post_update() { true; }
  89. # The above can be overridden in /etc/local/www
  90. . /etc/local/www || exit1 "Unable to read prefs file"
  91. # variables and functions too boring to be configurable
  92. JDRESOLVE_BIN="/usr/bin/jdresolve"
  93. JDRESOLVE_DB="/var/cache/jdresolve/hosts.db"
  94. #JDRESOLVE_OPTIONS="-r -t 5 --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  95. # DB access doesn't work currently (no output...)
  96. #JDRESOLVE_OPTIONS="-r --database=$JDRESOLVE_DB --dbfirst --expiredb=48"
  97. JDRESOLVE_OPTIONS="-r"
  98. JDRESOLVE_MERGE_OPTIONS="--mergedb --database=$JDRESOLVE_DB"
  99. LOGRESOLVE_BIN="/usr/sbin/logresolve"
  100. ANALOG_BIN="/usr/bin/analog"
  101. RMAGIC_BIN="/usr/bin/rmagic"
  102. WEBALIZER_BIN="/usr/bin/webalizer"
  103. AWSTATS_BIN="/usr/lib/cgi-bin/awstats.pl"
  104. function awstats_setlog() { sed -e "s!^\(LogFile=\).*\$!\\1$2!" $1 > $1.tmp; mv $1.tmp $1; }
  105. # Webiste/independent checks
  106. test -d $WEBROOT || exit1 "Webroot \"$WEBROOT\" doesn't exist"
  107. test -d $LOGROOT || exit1 "Logroot \"$LOGROOT\" doesn't exist"
  108. if [ -x $JDRESOLVE_BIN ]; then
  109. touch $JDRESOLVE_DB || exit1 "Couldn't touch JDRESOLVE_DB"
  110. fi
  111. # Generate stats for websites from stdin or all default sites
  112. WEBSITES=$@
  113. if [ "$WEBSITES" = "" ]; then
  114. WEBSITES=$(webdirs $LOGROOT $WEBROOT $WEBSITE)
  115. fi
  116. for WEBSITE in $WEBSITES; do
  117. STATSDIR=$(statsdir $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve STATSDIR."
  118. # FIXME test -d $STATSDIR/.. || exit1 "Directory above STATSDIR doesn't exist."
  119. ANALOG_CFG=$(analog_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve ANALOG_CFG."
  120. RMAGIC_CFG=$(rmagic_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve RMAGIC_CFG."
  121. WEBALIZER_CFG=$(webalizer_cfg $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to resolve WEBALIZER_CFG."
  122. if [ $stamp = "init" ]; then
  123. pre_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_INIT."
  124. fi
  125. if [ $stamp = "update" ]; then
  126. pre_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing PRE_UPDATE."
  127. fi
  128. HOST=$(host $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get hostname for virtual host."
  129. DOMAIN=$(domain $LOGROOT $WEBROOT $WEBSITE) || exit1 "Unable to get domainname for virtual host."
  130. ANALOG_OPTIONS="-G +g/etc/analog.conf +g$ANALOG_CFG +A -a"
  131. RMAGIC_OPTIONS=""
  132. WEBALIZER_OPTIONS="-c /etc/webalizer.conf -c $WEBALIZER_CFG -o $STATSDIR/webalizer -f"
  133. AWSTATS_OPTIONS="-update"
  134. FQDN_ESC=`echo "$HOST.$DOMAIN" | sed -e 's/\./\\\./g'` # needed for awstats config
  135. if [ $DEBUG ]; then
  136. echo "Making stats for $WEBSITE in $STATSDIR:"
  137. JDRESOLVE_OPTIONS="$JDRESOLVE_OPTIONS -p"
  138. ANALOG_OPTIONS="$ANALOG_OPTIONS +q"
  139. RMAGIC_OPTIONS="$RMAGIC_OPTIONS"
  140. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -T"
  141. AWSTATS_OPTIONS="$AWSTATS_OPTIONS -showsteps"
  142. else
  143. JDRESOLVE_OPTIONS="$JDRESOLVE_DB -n"
  144. ANALOG_OPTIONS="$ANALOG_OPTIONS -q"
  145. RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Verbose=NONE"
  146. WEBALIZER_OPTIONS="$WEBALIZER_OPTIONS -Q"
  147. AWSTATS_OPTIONS="$AWSTATS_OPTIONS"
  148. fi
  149. if [ $stamp = "init" -o $stamp = "init" ]; then
  150. [ $DEBUG ] && echo "$WEBSITE: Purge STATSDIR"
  151. rm -rf $STATSDIR
  152. mkdir $STATSDIR
  153. fi
  154. LOGDATARESOLVED="$STATSDIR/rawlog_old.txt"
  155. LOGDATA="$STATSDIR/rawlog_new.txt"
  156. LOGDATATMP="$STATSDIR/rawlog_incoming.txt"
  157. touch $LOGDATARESOLVED $LOGDATA $LOGDATATMP || exit1 "Couldn't touch LOGDATA files."
  158. if [ $stamp = "init" ]; then
  159. logcontentresolved $LOGROOT $WEBROOT $WEBSITE >> $LOGDATARESOLVED
  160. if [ -x $ANALOG_BIN ]; then
  161. [ $DEBUG ] && echo "$WEBSITE: Create/update analog config"
  162. echo "\
  163. # NB! This file is automatically generated. Do not edit directly!
  164. # Instead, put additions/overrides in $ANALOG_CFG.local
  165. HOSTNAME $HOST.$DOMAIN
  166. HOSTURL http://$HOST.$DOMAIN/
  167. BASEURL http://$HOST.$DOMAIN
  168. LANGUAGE DANISH
  169. "\
  170. > $ANALOG_CFG
  171. [ -s $ANALOG_CFG.local ] && cat $ANALOG_CFG.local >> $ANALOG_CFG
  172. mkdir $STATSDIR/analog
  173. [ $DEBUG ] && echo "$WEBSITE: Create initial analog stats"
  174. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"LOGFILE $LOGDATARESOLVED" -C"CACHEOUTFILE $STATSDIR/analog/cache.data" -C"OUTFILE $STATSDIR/analog/index.html"
  175. fi
  176. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  177. [ $DEBUG ] && echo "$WEBSITE: Create/update Report Magic config"
  178. echo "\
  179. # NB! This file is automatically generated. Do not edit directly!
  180. # Instead, put additions/overrides in $RMAGIC_CFG.local
  181. [statistics]
  182. File_In = $STATSDIR/rmagic/report.dat
  183. Frame_File_Out = $STATSDIR/rmagic/index.html
  184. Language = en
  185. [reports]
  186. File_Out = $STATSDIR/rmagic/
  187. [QUICK]
  188. Rows = ALL
  189. [navigation]
  190. File_Out = navfile.html
  191. "\
  192. > $RMAGIC_CFG
  193. [ -s $RMAGIC_CFG.local ] && $RMAGIC_OPTIONS="$RMAGIC_OPTIONS -statistics_Include=$RMAGIC_CFG.local"
  194. mkdir $STATSDIR/rmagic
  195. [ $DEBUG ] && echo "$WEBSITE: Create initial Report Magic stats"
  196. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  197. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  198. fi
  199. if [ -x $WEBALIZER_BIN ]; then
  200. [ $DEBUG ] && echo "$WEBSITE: Create/update Webalizer config"
  201. echo "\
  202. # NB! This file is automatically generated. Do not edit directly!
  203. # Instead, put additions/overrides in $WEBALIZER_CFG.local
  204. HostName $HOST.$DOMAIN
  205. HideSite *$DOMAIN
  206. HideReferrer $DOMAIN/
  207. "\
  208. > $WEBALIZER_CFG
  209. [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
  210. mkdir $STATSDIR/webalizer
  211. [ $DEBUG ] && echo "$WEBSITE: Create initial Webalizer stats"
  212. cat $LOGDATARESOLVED | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 - || true
  213. fi
  214. if [ -x $AWSTATS_BIN ]; then
  215. [ $DEBUG ] && echo "$WEBSITE: Create/update AWStats config"
  216. echo "\
  217. # NB! This file is automatically generated. Do not edit directly!
  218. # Instead, put additions/overrides in $AWSTATS_CFG.local
  219. LogFile=\"$LOGDATATMP\"
  220. LogFormat=4
  221. DNSLookup=0
  222. DirData=\"$STATSDIR/awstats\"
  223. AllowToUpdateStatsFromBrowser=0
  224. DirCgi=\"http://cgi.jones.dk/cgi-bin\"
  225. DirIcons=\"http://stats.jones.dk/awstats-icon\"
  226. SiteDomain=\"$FQDN_ESC\"
  227. HostAliases=\"$FQDN_ESC\"
  228. Lang=\"dk\"
  229. DirLang=\"/usr/share/awstats/lang\"
  230. DefaultFile=\"index.html\"
  231. SkipHosts=\"\"
  232. SkipFiles=\"\"
  233. ShowLinksOnUrl=1
  234. ShowFlagLinks=0
  235. "\
  236. >/etc/awstats/awstats.$WEBSITE.conf
  237. [ -s $WEBALIZER_CFG.local ] && cat $WEBALIZER_CFG.local >> $WEBALIZER_CFG
  238. mkdir $STATSDIR/awstats
  239. [ $DEBUG ] && echo "$WEBSITE: Create initial AWStats stats"
  240. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATARESOLVED
  241. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  242. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  243. fi
  244. [ $DEBUG ] && echo "$WEBSITE: Compress DNS-resolved logdata"
  245. gzip -9 $LOGDATARESOLVED
  246. fi
  247. if [ -x $JDRESOLVE_BIN ]; then
  248. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using jdresolve"
  249. # DB access doesn't work currently (no output...)
  250. # logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  251. # jdresolve $JDRESOLVE_MERGE_OPTIONS $LOGDATA
  252. logcontent $LOGROOT $WEBROOT $WEBSITE | $JDRESOLVE_BIN $JDRESOLVE_OPTIONS - >> $LOGDATA
  253. elif [ -x $LOGRESOLVE_BIN ]; then
  254. [ $DEBUG ] && echo "$WEBSITE: DNS-resolve new logdata using logresolve"
  255. logcontent $LOGROOT $WEBROOT $WEBSITE | $LOGRESOLVE_BIN >> $LOGDATA
  256. else
  257. [ $DEBUG ] && echo "$WEBSITE: Merging new logdata without DNS-resolving (no resolver found)"
  258. logcontent $LOGROOT $WEBROOT $WEBSITE >> $LOGDATA
  259. fi
  260. if [ -s $LOGDATA ]; then
  261. if [ -x $ANALOG_BIN ]; then
  262. [ $DEBUG ] && echo "$WEBSITE: Update analog stats"
  263. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"OUTFILE $STATSDIR/analog/index.html"
  264. fi
  265. if [ -x $RMAGIC_BIN -a -x $ANALOG_BIN ]; then
  266. [ $DEBUG ] && echo "$WEBSITE: Update Report Magic stats"
  267. $ANALOG_BIN $ANALOG_OPTIONS -C"LOGFILE none" -C"CACHEFILE $STATSDIR/analog/cache.data" -C"LOGFILE $LOGDATA" -C"LANGUAGE ENGLISH" -C"OUTPUT COMPUTER" -C"OUTFILE $STATSDIR/rmagic/report.dat"
  268. $RMAGIC_BIN $RMAGIC_OPTIONS $RMAGIC_CFG
  269. fi
  270. if [ -x $WEBALIZER_BIN ]; then
  271. [ $DEBUG ] && echo "$WEBSITE: Update Webalog stats"
  272. zcat -f $LOGDATA | $WEBALIZER_BIN $WEBALIZER_OPTIONS -N 0 -
  273. fi
  274. if [ -x $AWSTATS_BIN ]; then
  275. [ $DEBUG ] && echo "$WEBSITE: Update AWStats stats"
  276. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATA
  277. $AWSTATS_BIN -config=$WEBSITE $AWSTATS_OPTIONS -output > $STATSDIR/awstats/index.html
  278. awstats_setlog /etc/awstats/awstats.$WEBSITE.conf $LOGDATATMP
  279. fi
  280. gzip -f9 $LOGDATA
  281. fi
  282. if [ $stamp = "init" ]; then
  283. post_init $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_INIT."
  284. fi
  285. if [ $stamp = "update" ]; then
  286. post_update $LOGROOT $WEBROOT $WEBSITE || exit1 "Error executing POST_UPDATE."
  287. fi
  288. done