#!/usr/local/bin/perl ################################################################################ # # squid2common.pl : convert squid log files (native or httpd emulation) into # cern-style cache hit & cache miss logs (in common format) # # Usage: squid2common.pl [ ... ] # # Martin Gleeson, August 1996 # # (c) Copyright, The University of Melbourne, 1996 # # Version History # ~~~~~~~~~~~~~~~ # Version 1.3.2 28 April 1997 # * Fixed: bug with printing IP number when hostname lookup failed. # Thanks to Larry Hoehn # Current for squid 1.1.9 # # Version 1.3.1 12 March 1997 # * Fixed: wasn't counting SIBLING_HITs as cache hits when in native format # Thanks to Andrew Moar # Current for squid 1.1.8 # # Version 1.3 29 January 1997 # * Now handles multiple log files on the command line # * Added support for processing gzipped logs # * Bug fix: didn't process lines from old emulation format logs correctly # Thanks to Marc Delisle # # Version 1.2.1 17 January 1997 # * Bug fix: when log had FQDNs instead of IP numbers, erroneous host # information was propagated throughout the output logs # Thanks to Adrian Bassett # # Version 1.2 15 January 1997 # * Added support for more of squid's response types # (TCP_IFMODIFIEDSINCE changed to TCP_IMS_HIT and TCP_IMS_MISS, # TCP_REFRESH_HIT, TCP_REF_FAIL_HIT, SIBLING_HIT, PARENT_HIT, etc) # Current for squid 1.1.4. # # Version 1.1 27 August 1996 # * Bug fixes: not turning some IP's into hostnames # missing some squid-specific response types # Thanks to Mark Treacy # # Version 1.0 8 July 1996 # * Initial version. # ################################################################################ #------------ Configuration options -------------------------------------------# #------------ You must set the following variable -----------------------------# $gmtoffset = "+1000"; ###### Offset from GMT #------------------------------------------------------------------------------# # uncomment the following variable if your access log is in httpd emulation # format i.e. your squid.conf has 'emulate_httpd_log' set to 'on' # $emulation = "true"; #------------------------------------------------------------------------------# # uncomment the following line if you want to ignore particular IP numbers # in the log - e.g. ignore neighbours if you only want stats for local clients. # It points to a file consisting of a single line with a list of one or more IP # numbers, delimited by the 'pipe' symbol, e.g.: # 123.45.67.89|234.56.78.90|234.5.6.78 # $stoplist = "/servers/http/squid/etc/neighbours"; #------------------------------------------------------------------------------# # Names of the output files: $cache_hit_file = "cache.convert"; $cache_miss_file = "proxy.convert"; #------------------------------------------------------------------------------# # uncomment the following line if you want all the output in one file, rather # than cache hit & miss logs # $onefile = "squid.convert"; #------------ No further configuration required -------------------------------# # For debugging purposes # $debug = 1; ################################################################################ $address_type=2; %months = ( '0','Jan', '1','Feb', '2','Mar', '3','Apr', '4','May', '5','Jun', '6','Jul', '7','Aug', '8','Sep', '9','Oct', '10','Nov', '11','Dec'); %longmonths = ( '0','January', '1','February', '2','March', '3','April', '4','May', '5','June', '6','July', '7','August', '8','September', '9','October', '10','November', '11','December'); #------------------------------------------------------------------------------# $usage = "usage: squid2common.pl [ ... ]\n"; if( ! $ARGV[0] ) { die $usage; } @logfiles = @ARGV; #------------------------------------------------------------------------------# ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $hour = "0" . $hour if($hour < 10); $min = "0" . $min if($min < 10); $sec = "0" . $sec if($sec < 10); $time_now = "$hour:$min:$sec on $mday $longmonths{$mon} $year"; select(STDERR); print '=' x 78; print "\n"; print "squid2common.pl started at $time_now.\n"; print '=' x 78; print "\n\n"; foreach $logfile (@logfiles) { $wc = &find_prog('wc'); $zcat = &find_prog('zcat'); if( $logfile =~ /\.gz$/ ) { $cmd = "$zcat $logfile | $wc -l"; open(COUNT,"$cmd |") || die "Couldn't run command '$cmd': $!"; while( ){ chop; ($line_count) = /^\s*(\d+)\s*\S*$/; } close(COUNT); } else { $cmd = "$wc -l $logfile"; open(COUNT,"$cmd |") || die "Couldn't run command '$cmd': $!"; while( ){ chop; ($line_count) = /^\s*(\d+)\s*\S*$/; } close(COUNT); } $inc = sprintf "%d", ( $line_count / 50 ); print " The logfile has $line_count entries.\n"; print " Processing...\n"; print " 0% 50% 100%\n"; print " |-----------------------|------------------------|\n "; $counter=0; $hash_counter=0; $counter=$linecount=0; if( $stoplist ) { open(STOPLIST,"$stoplist"); $stops = ; chop($stops) if($stops =~ /\n/); close(STOPLIST); } if( $logfile =~ /\.gz$/ ) { open(LOG_FILE,"$zcat $logfile |") || die "Couldn't open logfile '$zcat $logfile': $!"; } else { open(LOG_FILE,"$logfile") || die "Couldn't open logfile '$logfile': $!"; } if( ! $onefile ) { open(PROXY_FILE,">> $cache_miss_file") || die "Couldn't open for output - '$cache_miss_file': $!"; open(CACHE_FILE,">> $cache_hit_file") || die "Couldn't open for output - '$cache_hit_file': $!"; } else { open(PROXY_FILE,">> $onefile") || die "Couldn't open for output - '$onefile': $!"; } #------------------------------------------------------------------------------# while(){ $linecount++; $counter++; if( $counter >= $inc ) { $counter = 0; $hash_counter++; print '#' if( $hash_counter <= 50 ); } chop; # split the input line into its various components if( $emulation ) { ($host, $rfc931, $user, $www_date, $request, $type, $size) = /^(\S+) (\S+) (\S+) \[([^\]]+)\] \"([^\"]*)\" (\S+) (\S+)$/; next if( ( $stoplist ) && ($host =~ /$stops/) ); } else { @line = split(/\s+/,$_,10); $time = $line[0]; $elapsed = $line[1]; $host = $line[2]; $codes = $line[3]; $size = $line[4]; $htype = $line[5]; $url = $line[6]; $ident = $line[7]; $hiercodes = $line[8]; $ContentType = $line[9]; next if( ($stoplist) && ($host =~ /$stops/) ); ($seconds,$milliseconds) = split(/\./,$time); ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($seconds); $year += 1900; if($mday < 10){ $mday = "0" . "$mday";} if($hour<10){ $hour="0" . "$hour";} if($min < 10){ $min = "0" . "$min"; } if($sec<10){ $sec="0" . "$sec"; } $www_date = "$mday/$months{$mon}/$year:$hour:$min:$sec $gmtoffset"; ($type,$code,$remotetype) = split(/\//,$codes); $request = "$htype $url"; $request =~ s/ICP_QUERY/GET/g; } # convert IP into hostname if( $hosts{$host} ) { $name = $hosts{$host};} else { if($host =~ /\d+\.\d+\.\d+\.\d+/) { @address = split(/\./,$host); $addpacked = pack('C4',@address); ($name,$aliases,$addrtype,$length,@addrs) = gethostbyaddr($addpacked,$address_type); $name = $host if(!$name); } else { if ( $name eq "") { $name = $host; } else { $name = $host; } } $name = "\L$name"; $hosts{$host} = $name; } if( $type eq "TCP_DENIED" ){ # fetched from external source $line_new = "$hosts{$host} - - [$www_date] \"$request HTTP/1.0\" 401 $size\n"; print PROXY_FILE "$line_new"; $sizeproxied += $size; $totalproxied += 1; } elsif( $type =~ /TCP_HIT/ || ($type =~ /UDP_HIT_OBJ/) || ($type =~ /SIBLING_HIT/) || ($type =~ /TCP_IMS_HIT/) || ($hiercodes =~ /SIBLING_HIT/) || ($hiercodes =~ /PARENT_HIT/) || ($type =~ /TCP_REF_FAIL_HIT/) || ($type =~ /TCP_REFRESH_HIT/) || ($type =~ /PARENT_HIT/)) { if( $size != 0 ) { $line_new = "$hosts{$host} - - [$www_date] \"$request HTTP/1.0\" 200 $size\n"; if( ! $onefile ) { print CACHE_FILE "$line_new"; } else { print PROXY_FILE "$line_new"; } $sizecached += $size; $totalcached += 1; } } # There are lots more miss codes than these now, so we'll just say # that anything else is a miss. # #elsif( $type =~ /TCP_MISS/ || (($type =~ /TCP_IFMODSINCE/) && ($size < 220)) # || ($type =~ /TCP_EXPIRED/) || ($type =~ /TCP_REFRESH/) # || ($type =~ /TCP_SWAPFAIL/)) else { # fetched from external source $line_new = "$hosts{$host} - - [$www_date] \"$request HTTP/1.0\" 200 $size\n"; print PROXY_FILE "$line_new"; $sizeproxied += $size; $totalproxied += 1; $miss_types{$type}++; } } if( $hash_counter < 50 ) { while( $hash_counter <= 50 ) { $hash_counter++; print '#'; } } print "\n\n"; close(LOG_FILE); close(PROXY_FILE); close(CACHE_FILE); #------------------------------------------------------------------------------# select(STDERR); printf "%s lines processed.\n\n", &commas($linecount); printf "Total hits: Proxy %s requests, Cache %s requests.\n\n", &commas($totalproxied),&commas($totalcached); printf "Total sizes: Proxy %s bytes, Cache %s bytes\n\n", &commas($sizeproxied),&commas($sizecached); if( ($totalproxied != 0 || $totalcached != 0) && ($sizeproxied!=0 || $sizecached!=0)) { printf "Hit rates: %s%% requests, %s%% bytes\n\n", &commas( ($totalcached/($totalproxied+$totalcached)) * 100), &commas( ($sizecached/($sizeproxied+$sizecached)) * 100); } } ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $hour = "0" . $hour if($hour < 10); $min = "0" . $min if($min < 10); $sec = "0" . $sec if($sec < 10); $time_now = "$hour:$min:$sec on $mday $longmonths{$mon} $year"; print '=' x 78; print "\n"; printf "squid2common.pl finished at $time_now.\n"; print '=' x 78; print "\n"; $total=0; print "\nMiss Types:\n" if($debug); foreach $key (keys (%miss_types)) { printf "Type: %-30s Number:%10d\n",$key,$miss_types{$key} if($debug); $total += $miss_types{$key}; } printf "Total: %-29s %10d\n\n",$xxx,$total if($debug); print '=' x 78 if($debug); print "\n" if($debug); exit(0); #------------------------------------------------------------------------------# sub commas { local($_)=@_; $_ = sprintf "%.0f", $_; 1 while s/(.*\w)(\w\w\w)/$1,$2/; $_; } #------------------------------------------------------------------------------# sub find_prog { # find the path to a program if it's in your PATH, or return nothing. local( $prog ) = @_; local( $path ) = $ENV{ 'PATH' } . ':' . $extra_path; foreach $dir ( split( /:/, $path ) ) { local( $path ) = "$dir/$prog"; if( -x $path && ! -d $path ) { return $path; } } return ''; } #------------------------------------------------------------------------------#