#!/usr/bin/perl -w
#
#   parselog.pl v1.0.1
#
#   Copyright (C)2000 Mark A. Bentley <bentlema@cs.umn.edu>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
###############################################################################
#
#   This file was last modified:  July 26, 2000
#
#   You can download the latest distribution from my web site:
#
#         http://www.cs.umn.edu/~bentlema/projects
#
#   For comments, suggestions, bug reports, or whatever, you may contact me
#   via email:
#
#         Mark A. Bentley <bentlema@cs.umn.edu>
#
###############################################################################
#
#
#                              DESCRIPTION
#
# This script is designed to accept log output from the Apache web server via 
# the CustomLog directive.  It will create a directory for each virtual host's 
# logs, and will create a log file within this directory with the name based on
# the year, month and day.  When the day changes, yesturday's log files will be
# closed, and new ones opened for today.
#
#
#                                USAGE
#
# You will need to create a CustomLog entry in your httpd.conf in the global 
# section (not within a virtual host definition) like this:
#
#      CustomLog "|/usr/local/bin/parselog" "[ %v %{%Y %m %d}t ] %h %l %u %t \"%r\" %s %b"
#
# The log formats definied within the brackets are absolutely necessary.  
# The %v will expand to the virtualhost name, and the %t (with its internal 
# format) will expand to the year, month, and day seperated by spaces.  If you 
# change any of the values within these brackes, you will have to modify the
# perl regular expression within the code below.  The code will strip off these
# brackets and everything within and write the remaining common log to the 
# appropriate directory and filename.
#
# This script will be fork/exec'ed with root perms if you've started apache as 
# root.  To be safe you should specify the absolute path to parselog in your 
# CustomLog directive.
#
# You will also have to set the LOG_DIR in the configuration section below.
#
# This script will run with root privileges, so be careful if you decide to 
# make changes or add functionality.
#

#
# Configuration
#

$LOG_DIR   = "/var/log/apache-vhosts";

$INPUT_SRC = STDIN;  # you shouldn't need to change this.

$LOG_FILE_PERMS = 0640;
$LOG_FILE_OWNER = "root";
$LOG_FILE_GROUP = "adm";
$LOG_DIR_PERMS  = 0755;
$LOG_DIR_OWNER  = "root";
$LOG_DIR_GROUP  = "root";

$LOG_FILE_UID = ( getpwnam( $LOG_FILE_OWNER ))[2];
$LOG_FILE_GID = ( getgrnam( $LOG_FILE_GROUP ))[2];
$LOG_DIR_UID  = ( getpwnam( $LOG_DIR_OWNER  ))[2];
$LOG_DIR_GID  = ( getgrnam( $LOG_DIR_GROUP  ))[2];

#
# Initialization
#

my %HOSTNAME   = ();  # hash of hostnames we've seen already
my @OPEN_FH    = ();  # array of open filehandles for each hostname

#
# Support routines
#

sub flush {  # Takes a filehandle 
    local($old) = select(shift);
    $| = 1; print ""; $| = 0;
    select($old);
}

sub makenote {  # Takes a string;
   $note = shift;
   $mylocaltime = localtime(time());
   print STDERR "[$mylocaltime] $0: ", $note, "\n";
}

#
# Main loop
#

makenote("Starting up...  PID: $$");
makenote("INPUT_SRC = $INPUT_SRC");

if ( -d $LOG_DIR ) {
   makenote("LOG_DIR = $LOG_DIR");
} else {
   makenote("Fatal Error:  $LOG_DIR does not exist!");
   exit( 1 );
}


makenote("Log Directory: " .
         "$LOG_DIR_OWNER($LOG_DIR_UID) " .
         "$LOG_DIR_GROUP($LOG_DIR_GID) $LOG_DIR_PERMS");

makenote("Log Files:     " .
         "$LOG_FILE_OWNER($LOG_FILE_UID) " .
         "$LOG_FILE_GROUP($LOG_FILE_GID) $LOG_FILE_PERMS");

$previous_day = "0";

while( <$INPUT_SRC> ) {
   m/^\[ (\S+) (\d+) (\d+) (\d+) \] (.*)/;  # parse the log input here

   $log_hostname = $1;
   $log_year     = $2;
   $log_month    = $3;
   $log_day      = $4;
   $log_common   = $5;

   $log_pathname = "$LOG_DIR/$log_hostname";

   #
   # Did the day change?
   #

   if ( $log_day ne $previous_day ) {
      makenote("Day change.");
      %HOSTNAME = ();
      makenote("HOSTNAME hash reset.");
      makenote("OPEN_FH: $#OPEN_FH");
      if ( $#OPEN_FH > 0 ) {
         foreach $fh ( @OPEN_FH ) {
            makenote("Closing $fh filehandle.");
            close( $fh );
         } 
      }
      @OPEN_FH = ();
      makenote("OPEN_FH array reset.");
   }

   #
   # Have we seen this virtual host today?
   #

   if ( ! $HOSTNAME{$log_hostname} ) {

      $HOSTNAME{$log_hostname} = 1; # mark host as seen

      #
      # create hostname directory within LOG_DIR
      #

      if ( ! -e $log_pathname ) {
         makenote("mkdir: $log_pathname");
         mkdir( $log_pathname, $LOG_DIR_PERMS );
         chown( $LOG_DIR_UID, $LOG_DIR_GID, $log_pathname );
         chmod( $LOG_DIR_PERMS, $log_pathname );
      }

      #
      # open logfile named year.month.day for appending
      # using $log_hostname as the filehandle name
      #

      my $current_log = "$log_pathname/$log_year.$log_month.$log_day";

      makenote("open: $current_log");
      open( $log_hostname, ">> $current_log");
      chown( $LOG_FILE_UID, $LOG_FILE_GID, "$current_log" );
      chmod( $LOG_FILE_PERMS, "$current_log" );

      #
      # maintain a list of open filehandles
      #

      push @OPEN_FH, $log_hostname;

   }

   #
   # Write the log line to the appropriate log file.  (Remember that
   # the file handle name is the same as the hostname.)
   #

   print $log_hostname $log_common, "\n";
   flush( $log_hostname  );
   $previous_day = $log_day;
}