#!/local/bin/perl5 # Copyright (c) 1998 University of Southern California. # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed by the University of # Southern California, Information Sciences Institute. The name of the # University may not be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # PROGRAM: dailylog.pl {} # if ( !defined($ARGV[0]) ) { print "\n\aERROR: In dailylog.pl, arguments are incorrect.\n"; print "Called as: dailylog.pl {}\n"; print " -- Name of Squid proxy log file.\n"; print " -- Optional directory for sort() temp files.\n\n"; exit(1); } $log_file = $ARGV[0]; if ( !defined($ARGV[1]) ) { $ARGV[1] = "/tmp"; } $sort_dir = $ARGV[1]; # # Get filename prefix from . # $log_name = $log_file; $log_name =~ s/.*\///; # get rid of path parts $log_name =~ s/\..*//; # get rid of extension parts # # Cut out unneeded fields of log records. # # NOTE: GNU version of cut, gcut, is used in place of cut. # cut has a line length limit that is sometimes too small. # Rediculously long URLs are sometimes specified by buggy # browsers. # print "gcut -f 1,3,4,5,7 -d ' ' $log_file > $log_file.cut\n"; if (system "gcut -f 1,3,4,5,7 -d ' ' $log_file > $log_file.cut") { print "\n\aIn dailylog.pl unable to cut file: $log_file.\n"; print "Error: $!.\n\n"; exit(1); } # # Delete the uncut log file. # unlink $log_file; # # Sort the cut log file by URL. # # NOTE: The GNU version of the sort() program, gsort(), is used. # Due to browser bugs, URL specifications can sometimes be as # longs as several thousand characters. The standard sort() # program cannot deal with that, whereas gsort() can. # $dst_sort = "$log_file.sorted"; $dst_sort =~ s/\.sanitized-access//i; print "gsort +4 -o $dst_sort -T $sort_dir $log_file.cut\n"; if (system "gsort +4 -o $dst_sort -T $sort_dir $log_file.cut") { print "\n\aIn dailylog.pl unable to sort file: $log_file.cut\n"; print "Error: $!.\n\n"; exit(1); }; # # Delete the unsorted cut log file. # unlink "$log_file.cut"; # Filter out of the sorted log file those references that would not be # made if client caches were all operating and all URLs were cacheable. $dst_filter = $dst_sort; $dst_filter =~ s/sorted/filtered/; $dst_drop = $dst_sort; $dst_drop =~ s/sorted/dropped/; print "ccachefilter.pl $dst_sort $dst_filter $dst_drop\n"; if (system "ccachefilter.pl $dst_sort $dst_filter $dst_drop") { print "\n\aIn dailylog.pl ccachefilter.pl failed for: $dst_sort.\n"; exit(1); } # # Build the request matrix file for the filtered, sorted log file. # $dst_reqmat = $dst_sort; $dst_reqmat =~ s/sorted/filtered.reqmat/; print "reqmat.pl $dst_filter $dst_reqmat $log_name.summary.filtered\n"; if (system "reqmat.pl $dst_filter $dst_reqmat $log_name.summary.filtered") { print "\n\aIn dailylog.pl reqmat.pl failed for: $dst_filter.\n"; exit(1); } # # Delete the filtered log file. # unlink $dst_filter; # # Build the request matrix file for the dropped, sorted log file. # $dst_reqmat =~ s/filtered/dropped/; print "reqmat.pl $dst_drop $dst_reqmat $log_name.summary.dropped\n"; if (system "reqmat.pl $dst_drop $dst_reqmat $log_name.summary.dropped") { print "\n\aIn dailylog.pl reqmat.pl failed for: $dst_drop.\n"; exit(1); } # # Delete the dropped log file and the dropped.reqmat file. # Keep the summary.dropped file. # unlink $dst_reqmat; unlink $dst_drop; # # Build the request matrix file for the non-filtered sorted log file. # $dst_reqmat =~ s/.dropped//; print "reqmat.pl $dst_sort $dst_reqmat $log_name.summary\n"; if (system "reqmat.pl $dst_sort $dst_reqmat $log_name.summary") { print "\n\aIn dailylog.pl reqmat.pl failed for: $dst_sort.\n"; exit(1); } # # Delete the non-filtered file. # unlink $dst_sort; exit(0);