#!/local/bin/perl5 -w # Copyright (c) 1998 University of Southern California. # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed by the University of # Southern California, Information Sciences Institute. The name of the # University may not be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # Program: hsurlplot.pl # if ( !(defined ($ARGV[0])) || !(-r $ARGV[0]) || !(defined ($ARGV[1])) || !(defined $ARGV[2]) ) { print "\a\nRequires three arguments.\n"; print "\t1st argument: An inter-arrival-times file reverse sorted by hit count.\n"; print "\t2nd argument: A GNUPLOT output file.\n"; print "\t3rd argument: Count of unique URLs.\n"; exit (0); } # Open the input and output files. Shift the two leading # args off @ARGV leaving the expression to evaluate. open (INFILE,"<$ARGV[0]"); shift; open (PLOTFILE,">$ARGV[0]"); shift; $urlstotal = $ARGV[0]; # Create a vector of boolean expressions that filter on hit count. # Create a vector of boolean expressions that filter on standard deviation. @hitexprs = (); @stdexprs = (); $low_hit_limit = 30; for ($hits = $low_hit_limit; $hits <= 300; $hits += 30) { push @hitexprs, "\$_[2] >= $hits"; }; for ($std = 8640; $std <= 86400; $std += 8640) { push @stdexprs, "\$_[5] <= $std"; } # Create hash of hashes, where keys are boolean expressions # that are later both applied to each line the input file. %HoEs = (); foreach $hitexpr (@hitexprs) { foreach $stdexpr (@stdexprs) { $HoEs{$hitexpr}{$stdexpr} = 0; } } # Iterate across the lines of the input file. For each line, iterate # across the outer and inner hashes, making a conjunctive boolean # expression out of the keys. Apply that expression to the input line. # The values associated with the inner hash are the percentage of # URLs associated with that input line. $linecount = 0; LINE: while () { split; foreach $hitselem (keys %HoEs) { foreach $stdelem (keys %{$HoEs{$hitselem}}) { if (eval "($hitselem && $stdelem)" ) { $HoEs{$hitselem}{$stdelem} += 100 / $urlstotal; } } } if (!(++$linecount % 1000)) { print "$linecount\n"; } # When the source file is reverse sorted by hit count, the last # line that needs to be processed can be determined. last LINE if ($_[2] < $low_hit_limit); } # Print the results of the expression applications to each input line # as a two-dimensional surface suited for GNUPLOT's splot command. foreach $hitselem (@hitexprs) { $_ = $hitselem; split; $hprint = $_[2]; foreach $stdelem (@stdexprs) { $_ = $stdelem; split; $_[2] /= 86400; print PLOTFILE "$hprint $_[2] $HoEs{$hitselem}{$stdelem}\n"; } print PLOTFILE "\n"; } close (INFILE); close (PLOTFILE); exit(0);