#!/local/bin/perl5 # Copyright (c) 1998 University of Southern California. # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed by the University of # Southern California, Information Sciences Institute. The name of the # University may not be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # PROGRAM: monthly.pl # # Called as: monthly.pl # # # This program performs the end-of-month processing of request-matrix files # associated with the site . # # This is done for the *.reqmat, .*.filtered.reqmat and # .*.dropped.reqmat file groups. The result is a single monthly # reqmat file for each file group. The corresponding summary files are # updated to reflect data for the month. # # Once the monthly processing for each file group has been accomplished, # the original files are deleted, leaving only the combined monthly # request-matrix and summary file for the group. # # # --- Indicates a set of related files: .*.reqmat. # # --- Date string of the form: YYMMDD. This signifies # the first day of that month for which a reqmat file exists. # # --- Date string of the form: YYMMDD. This signifies # the last day of that month for which a reqmat file exists. # # --- Directory that the sort() program uses as a # temporary repository for partialy sorted results. # # NOTE: The sorts done here assume that each daily reqmat file has been # already sorted, allowing the sort() program to skip individual # file sorting and immediately begin a multi-file merge. These # sorts can also easily require more than 1GB of free space. # # NOTE: The GNU version of the sort() program, gsort(), is used. # Due to browser bugs, URL specifications can sometimes be as # longs as several thousand characters. The standard sort() # program cannot deal with that, whereas gsort() can. # ################################################################## # Create list of filtered reqmat files to be sorted. # Sort them by URL string. # @filelst = glob ("$ARGV[0].[0-9][0-9][0-9][0-9][0-9][0-9].filtered.reqmat"); $sorted_file = "$ARGV[0].$ARGV[1]-$ARGV[2].filtered.sorted"; print "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst\n"; if (system "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst") { printf "\n\aIn monthly.pl sort failed for: $ARGV[0].*.filtered.reqmat.\n"; exit(1); } # # Create monthly combined filtered reqmat file. # $output_file = "$ARGV[0].$ARGV[1]-$ARGV[2].filtered.reqmat"; printf "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary.filtered\n"; if (system "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary.filtered") { printf "\n\aIn monthly.pl cmbreqmat failed for: $sorted_file.\n"; exit(1); }; # # Rename summary file. # rename ("$ARGV[0].summary.filtered", "$ARGV[0].$ARGV[1]-$ARGV[2].summary.filtered"); # # Delete all the daily filtered.reqmat files and sorted file. # foreach $file (@filelst) { unlink ($file); } unlink ($sorted_file); # # Compress the reqmat file. # print "gzip $output_file\n"; if (system "gzip $output_file") { printf "\n\aIn monthly.pl gzip failed for: $output_file.\n"; exit(1); }; ################################################################# # Create list of filtered and dropped reqmat files to be sorted. # Sort them by URL string. # # NOTE: For now these reqmat files are not being kept. Only the # summary file is kept. # #@filelst = glob ("$ARGV[0].[0-9][0-9][0-9][0-9][0-9][0-9].dropped.reqmat"); #$sorted_file = "$ARGV[0].$ARGV[1]-$ARGV[2].dropped.sorted"; #print "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst\n"; #if (system "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst") #{ # printf "\n\aIn monthly.pl sort failed for: $ARGV[0].*.dropped.reqmat.\n"; # exit(1); #} # # Create combined reqmat files from sorted monthly files. # #$output_file = "$ARGV[0].$ARGV[1]-$ARGV[2].dropped.reqmat"; #printf "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary.dropped\n"; #if (system "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary.dropped") #{ # printf "\n\aIn monthly.pl cmbreqmat failed for: $sorted_file.\n"; # exit(1); #}; # # Rename summary file. # #rename ("$ARGV[0].summary.dropped", # "$ARGV[0].$ARGV[1]-$ARGV[2].summary.dropped"); # # Delete all the daily dropped.reqmat files and sorted file. # #foreach $file (@filelst) { unlink ($file); } #unlink ($sorted_file); # # Compress the reqmat file. # #print "gzip $output_file\n"; #if (system "gzip $output_file") #{ # printf "\n\aIn monthly.pl gzip failed for: $output_file.\n"; # exit(1); #}; #################################################################### # Create list of non-filtered reqmat files to be sorted. # Sort them by URL string. # @filelst = glob ("$ARGV[0].[0-9][0-9][0-9][0-9][0-9][0-9].reqmat"); $sorted_file = "$ARGV[0].$ARGV[1]-$ARGV[2].sorted"; print "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst\n"; if (system "gsort -m +0 -o $sorted_file -T $ARGV[3] @filelst") { printf "\n\aIn monthly.pl sort failed for: $ARGV[0].*.reqmat.\n"; exit(1); } # # Create combined reqmat files from sorted monthly files. # $output_file = "$ARGV[0].$ARGV[1]-$ARGV[2].reqmat"; printf "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary\n"; if (system "cmbreqmat.pl $sorted_file $output_file $ARGV[0].summary") { printf "\n\aIn monthly.pl cmbreqmat failed for: $sorted_file.\n"; exit(1); }; # # Rename summary file. # rename ("$ARGV[0].summary", "$ARGV[0].$ARGV[1]-$ARGV[2].summary"); # # Delete all the daily reqmat files and sorted file. # foreach $file (@filelst) { unlink ($file); } unlink ($sorted_file); # # Compress the reqmat file. # print "gzip $output_file\n"; if (system "gzip $output_file") { printf "\n\aIn monthly.pl gzip failed for: $output_file.\n"; exit(1); }; exit(0);