#!/usr/bin/perl # # stoic.pl - web server log analysis tool thingy # harrison@area.com, dell@area.com # # reports: # # - logfile totals (hits, bytes transferred, date ranges, more.) # - daily totals (hits, bytes transferred, unique sites) # - hourly totals (hits per hour, percent) # - domains visiting (includes definition) # - top n documents requested # - top n sites visiting # - status codes received (with notes) # - agent statistics (browser usage, broken down by known browsers) # - platform statistics (broken down by known platforms) # - robots visiting # - user site report (list of users, sites, hits, bytes on each) # - user report (hits, bytes, last access) # # miscellaneous features: # # - aol proxies broken down into one: *.proxy.aol.com # - major search engine requests are parsed # - ignores sites listed in specified file (e.g. your own, IP address, etc.) # - referrals are only collected if specified with -r switch # - country codes are matched with domains visiting # - browsers and platform report # - status codes are explained # - totals for entire logfile, hits, bytes, hits to home page, more. # - bad lines in logfile can be dumped to a file # # to-do: # # - everything gets written to STDOUT now, individual files later (?) # - switch to choose, order report output # - boy that danny kaye sure can tap dance # - ignore referrals from sites (switch pointing to file? use ignore list?) # # notes: # # - domains.txt should be in the same directory as this script, # otherwise specify path with -a. a copy of domains.txt is # at http://www.area.com/harrison/domains.txt # - the only required argument: # stoic -l [path-to-logfile] # e.g. stoic -l /home/web/mysite/logs/access # - authorized users file should be in .htaccess format # - all other switches alter the output slightly: # -a must be specified to show user reports # -c defaults to ./domains.txt if not specified # -d, -h, -r and -s default at 10 # -e defaults to all # -i FILE excludes domains listed in FILE (like your machine) # -n SITENAME puts the SITENAME in the totals report # # switches: # # -a [path] path to list of authorized users # -b [path] path to database (for running totals) # -c [path] path to country codes file # -d [n] top n documents accessed, default is 10 # -e [path] path to search engine hits # -f [path] path to failed input # -h [n] top n hosts, default is 10 (top level domains, .com, .edu, etc) # -i [path] path to ignore file # -l [path] path to logfile # -n [name] name of this site (e.g. www.area.com) # -p post-process referral output # -r [path] path to referral output # -s [n] top n sites visiting, default is 10 # -t [path] path to timing output # -u unique sites broken down by date # -v dean stark switch # -x interesting debug stuff # -y optionally tie aa's to dbm files (testing only) # -z [duh] special cases use Getopt::Std; &getopt ('a:b:c:d:e:f:h:i:l:n:r:s:t:u:z:'); my $version = "v1.2a"; $|++; if (! $opt_l) { print "The -l option should provide the path to the logfile.\n" . "(This is the only option that is required.)\n\n"; &usage; exit 1; } print STDERR "stoic $version\n" if ($opt_x); ############ # defaults # ############ $opt_d = 10 if (! $opt_d); # top n documents accessed $opt_h = 10 if (! $opt_h); # top n hosts visiting $opt_s = 10 if (! $opt_s); # top n sites (defaults to top ten) $opt_c = "./domains.txt" if (! $opt_c && -f "./domains.txt"); ############# # databases # ############# if ($opt_y) { if ($opt_z eq "pornopolis") { dbmopen (%SECTIONS, "./sections", 0770); # remove undef to keep running totals undef %SECTIONS; } } # running totals: # # logfile size (logsize) # hits (hits) # hits to main page (hits-main) # bytes transferred (bytes) # unique sites (sites) # time running (time) if ($opt_b) { dbmopen (%RUNNING, "./$opt_b", 0770); } ########### # globals # ########### my $start = time; my $start_time = &logdate; my $hits = 0; my $lines = 0; my $hits_to_home_page = 0; my $total_documents = 0; my $total_hosts = 0; my $total_referrals = 0; my $ignored = 0; my $total_bytes = 0; my $fail = 0; my $access = $opt_l; my $start_date; my $start_date_nice; my $end_date; my @date_order; my @BROWSERS_REDUX = ( "IBM", "Lynx", "Lotus-Notes/4", "Mozilla/0", "Mozilla/1", "Mozilla/2", "Mozilla/3", "Mozilla/4", "MSIE 4.0", "MSIE/2", "MSIE/3", "Opera", "AOL-IWENG", "HotJava", "Cyberdog", "IBrowse", "AmigaVoyager", ); %BROWSER_SHORTHAND = ( "InfoSeek Sidewinder" => "InfoSeek Sidewinder", "PRODIGY" => "Prodigy Web Browser", "MSProxy/2" => "[MSProxy v2]", "MSIE 2\." => "MSIE/2", "MSIE 3\." => "MSIE/3", "MSIE 4\." => "MSIE 4.0", "Microsoft Internet Explorer/4" => "MSIE 4.0", "SEGA Saturn" => "SEGA Saturn", "AOL \d+\.0" => "AOL browser", "Quarterdeck Mosaic" => "Quarterdeck Mosaic", ); %PLATFORM_SHORTHAND = ( "Windows;" => "Microsoft Windows 3.x", "Windows 3.1" => "Microsoft Windows 3.x", "Windows 16" => "Microsoft Windows 3.x", "Windows NT" => "Microsoft Windows NT", "Win16" => "Microsoft Windows 3.x", "Win32" => "Microsoft Windows 95 or NT", "WinNT" => "Microsoft Windows NT", "Win95" => "Microsoft Windows 95", "Windows 95" => "Microsoft Windows 95", "(Windows)" => "Microsoft Windows 3.x", "mac" => "Apple Macintosh", "PPC" => "Apple Macintosh", "Cyberdog" => "Apple Macintosh", "Java" => "Java", "NetBSD" => "NetBSD", "FreeBSD" => "FreeBSD", "BSD/OS" => "BSD/OS", "SunOS" => "SunOS", "HP-UX" => "HP-UX", "IRIX" => "IRIX", "DOSlynx" => "MS/DOS", "OmniWeb" => "NeXT OpenStep", "Windows x86" => "Microsoft Windows 3.x", "OpenVMS" => "OpenVMS", "wget" => "wget", "AIX" => "AIX", "Linux 1" => "Linux 1.x", "Linux 2" => "Linux 2.x", "PCN" => "PointCast Network", "Amiga" => "Amiga", "OS\/2" => "OS/2", "IBM[- ]WebExplorer" => "OS/2", "WebTV" => "WebTV", "OSF1" => "OSF1", "sega" => "Sega Saturn", ); my %STATUS_CODES = ( "200" => "Success", "202" => "Accepted", "203" => "Partial Information", "204" => "No response", "301" => "Document moved", "302" => "Forward", "304" => "Not Modified", "400" => "Bad request", "401" => "Unauthorized", "402" => "Payment Required", "403" => "Forbidden", "404" => "Not Found", "500" => "Internal Server Error", "501" => "Not Implemented", ); # $opt_i points to a file containing # a list of sites to ignore if ($opt_i) { open (IGNORE, "$opt_i") or die ("can't open $opt_i, $!"); while () { next if ($_ =~ /^#/); next if (! length ($_)); chomp; $IGNORE_SITES{$_}++; } close IGNORE; } # -c points to country code file # format: code country if ($opt_c) { open (COUNTRIES, $opt_c) or die ("can't open $opt_c, $!"); while () { chomp; my ($code, $country) = split (/\s+/, $_, 2); $code = lc ($code); $COUNTRY{$code} = $country; } close COUNTRIES; } # -a points to list of authorized users if ($opt_a) { open (HTACCESSFILE, $opt_a) or die ("$opt_a not found, $!"); while () { my $who; ($who, undef) = split (/:/, $_); $AUTH_USERS{$who}++; } close (HTACCESSFILE); } # -f points to where you want bad output # from the logfile dumped if ($opt_f) { $failed = $opt_f; open (FAILED, ">$failed") or die ("can't write to $failed, $!"); } # -r points to referral log files if ($opt_r) { open (REFERRALS, ">$opt_r") or die ("can't write to $opt_r, $!"); } # -e points to search engine hits if ($opt_e) { open (ENGINE_HITS, ">$opt_e") or die ("can't write to $opt_e, $!"); } # process logfile open (FILE, "$access") or die ("can't open $access, $!"); while () { chomp; undef $host; $lines++; if (length ($_) > 800) { $failed++; print FAILED "$_\n" if ($failed); next; } ($host, $remote_user, $auth_user, $date, $request, $status, $bytes, $referral, $agent) = $_ =~ m/^(\S+) (\S+) (\S+) \[([^\]\[]+)\] \"([^"]*)\" (\S+) (\S+) \"?([^"]*)\"? \"([^"]*)\"$/; if (! $host) { ($host, $remote_user, $auth_user, $date, $request, $status, $bytes) = $_ =~ m/^(\S+) (\S+) (\S+) \[([^\]\[]+)\] \"([^"]*)\" (\S+) (\S+)$/; } # line in access file failed (for some reason) if (! $host) { $fail++; print FAILED "$_\n" if ($failed); next; } # throw away netscape log header/definition/thingy next if (! $hits && $host =~ /^format=/); # skip hosts we want to ignore if ($opt_i && $IGNORE_SITES{$host}) { $ignored++; next; } # special hosts we want to wrap into one # (fucking aol proxies..) if ($host =~ /proxy.aol.com/) { $host = "*.proxy.aol.com"; } ################### # data collection # # starts here # ################### $hits++; $total_bytes += $bytes; #################### # debugging output # #################### if ($opt_x && (($hits / 250) == int ($hits / 250))) { print STDERR "."; } if ($opt_x && (($hits / 1000) == int ($hits / 1000))) { print STDERR "\nstoic: " . &commas ($lines) . " lines [" . &commas ($hits) . " hits " . &amount ($total_bytes) . "]\n"; if ($last_time_around) { print STDERR " seconds between debug report: " . (time - $last_time_around) . "\n"; } print STDERR " pid=$$ current=$now last=$host\n"; print STDERR " hosts=" . &commas ($total_hosts) . " documents=" . &commas ($total_documents) . " referrals=" . &commas ($total_referrals) . "\n"; print STDERR " start=$start_date_nice\n" . " now=" . &logdate . "\n" . " elapsed=" . &commas ((time - $start)) . " seconds\n\n"; $last_time_around = time; } # domains ($domain) = $host =~ m/[A-Z1-9].*\.(\S+)$/i; $domain = lc $domain; $DOMAINS{$domain}++ if ($domain !~ /^\d+$/); # start and end of log $start_date = $date if (! $start_date); $start_date_nice = &logdate if (! $start_date_nice); $end_date = $date; # unique hosts $HOSTS{$host}++; $total_hosts++; # request # # "GET /bullfrog/bigfrog.gif HTTP/1.0" undef $file; # parse file if ($status == 200 && $request ne "-") { ($file) = $request =~ m/^\S+ (\S+) \S+$/; $file = lc ($file); if (! $file) { # this doesn't happen very often at all $fail++; print FAILED "$_\n" if ($failed); next; } elsif ($file !~ /cgi-bin/ && $file !~ /\.gif/ && $file !~ /\.jpg/ && $file !~ /\.jpeg/ && $file !~ /\.xbm/) { $DOCUMENTS{$file}++; if ($file eq "/" || $file =~ /index.[s]?htm[l]?/i) { $hits_to_home_page++; } $total_documents++; } } # status $STATUS{$status}++; # date stuff # # 13/May/1997:11:28:40 -0700 ($day, $mon, $year, $hour) = $date =~ m/(\d+)\/(\S+)\/(\d+):(\d+):\d+:\d+.*/; $HOUR_OF_DAY{$hour}++; $now = sprintf "%02d-%s-%s", $day, $mon, substr ($year, 2, 2); $DAILY_HITS{$now}++; $DAILY_BYTES{$now} += $bytes; $DAILY_HOSTS{$now}{$host}++; push (@date_order, $now) if (! grep (/^$now$/, @date_order)); # daily statistics if ($opt_u) { push (@DAILY_ORDER, $now) if (! grep (/^$now$/, @DAILY_ORDER)); $DAILY_VISITORS{$now}{$host}++; } # user statistics # # counts on parse information from request # and date stuff # above, so don't move this # code around. if ($opt_a && $AUTH_USERS{$auth_user}) { $USER_BYTES{$auth_user} += $bytes; $USER_HITS{$auth_user}++; $USER_LAST{$auth_user} = $now; $USER_SITES_HITS{$auth_user}{$host}++; $USER_SITES_BYTES{$auth_user}{$host} += $bytes; } # i suppose this tracks the number of # pictures one looks at, but, duh? if ($opt_z && $opt_z eq "pornopolis" && $file !~ /thumb/) { if ($file =~ /view\.pl/) { $USER_HITS{$auth_user}++; } } # browser stuff # MSIE and other browsers mask themselves as "compatible" # with Mozilla. we try to catch them and parse them out. if ($agent =~ /compatible/) { my ($compatible, $browser) = $agent =~ m/(\S+) \(compatible; (.*)/i; foreach $compat (keys %BROWSER_SHORTHAND) { if ($browser =~ /$compat/) { $BROWSERS{$BROWSER_SHORTHAND{$compat}}++; } } } elsif ($agent) { $b_match = 0; foreach $b (@BROWSERS_REDUX) { if ($agent =~ /$b/) { $BROWSERS{$b}++; $b_match = 1; last; } } $BROWSERS{'other'}++ if (! $b_match); } # platforms if ($agent && $agent ne "-" && $request !~ /robots.txt/i) { $platform_identified = 0; foreach (keys %PLATFORM_SHORTHAND) { if ($agent =~ /$_/i) { $PLATFORMS{$PLATFORM_SHORTHAND{$_}}++; $platform_identified = 1; last; } } if (! $platform_identified) { if ($agent =~ /Lynx/) { $PLATFORMS{'unknown [using lynx]'}++; } elsif ($agent) { $PLATFORMS{'miscellaneous / unknown'}++; } else { $PLATFORMS{'unknown'}++; } } } # referrals if ($opt_r && $referral && $referral !~ /$opt_n/ && $referral ne "-" && $referral !~ /[\@\#]/ && # no embedded usernames or named links, plz. !&is_local_url ($referral)) # do special things inside &is_local_url, plz. { my $string, $matched; # strip off quotes around this (some browsers..) $referral =~ s/^\"//g; $referral =~ s/\"$//g; $referral = lc ($referral); $total_referrals++; # search engine? if ($referral =~ /\?/) { $matched = 0; # before and after the "?" ($chaff, $query) = ( $referral =~ m/(.+)\?(.+)/ ); (@pieces) = split (/\//, $chaff); if ($pieces[2] && $pieces[2] =~ /\./) { $ENGINES{$pieces[2]}++; } # get pornopolis queries out of the report # this happens when IP# is used instead of domain name if ($chaff =~ /view\.pl/ || $chaff =~ /index\.pl/) { $matched++; } (@pieces) = split (/&/, $query); foreach $item (@pieces) { if ($item =~ /^q=/ or $item =~ /^query=/ or $item =~ /^search=/ or $item =~ /^searchtext=/ or $item =~ /^qt=/ or $item =~ /^general=/ or $item =~ /^s=/ or $item =~ /^p=/ or $item =~ /^text=/ or $item =~ /^MT=/i ) { (undef, $string) = split (/=/, $item); $string =~ s/%([\dA-Fa-f][\dA-Fa-f])/pack ("C", hex ($1))/eg; $string =~ s/\+/ /g; $string =~ s/^\'//g; $string =~ s/\"//g; $string =~ s/^\s+//g; chomp $string; next if ($string =~ /\d+/ || length ($string) < 2); print ENGINE_HITS "$string\n"; $matched++; last; } } } else # not a search engine, an actual link { print REFERRALS "$referral\n"; } } # robots if ($file eq "/robots.txt" && $agent !~ /compatible/ && $agent ne "-") { $ROBOTS{$agent}++; $ROBOTS_LAST{$agent} = $now; } ####################### # site-specific cases # ####################### if ($opt_v) { @cs = qw (surf209 enclave.org); foreach (@cs) { if ($host =~ /$_/) { $CS_SITES{$host}++; $CS_SITES_LAST{$host} = $now; } } } if ($opt_z && $opt_z eq "pornopolis") { # /cgi-bin/view.pl?collection=current&dir=male&file=d09.jpg if ($file =~ m#^/cgi-bin/view.pl\?collection=(\S+)&dir=(\S+)&file=.*$#) { $section = $2; if ($1 eq "smut" || $1 eq "archive" || $1 eq "exhibits") { $section = "$1/$section"; } if ($1 eq "wow" || $1 eq "current") { $section = "$1/*"; } if ($1 =~ /^save\//) { $section = "save/*"; } $SECTIONS{$section}++; } } elsif ($opt_z && $opt_z eq "rotten") { if ($file =~ /gallery/) { my (@pieces) = split (/\//, $file); $SECTIONS{$pieces[$#pieces-1]}++ unless ($pieces[$#pieces-1] eq "gallery"); } } # done with loop } close FILE; close FAILED if ($failed); print STDERR "\n\nstoic: file complete.\n\n" if ($opt_x); @REPORTS = qw (totals daily hourly documents domains sites status agent platforms robots); push (@REPORTS, "user_sites", "user_report") if ($opt_a); push (@REPORTS, "referral") if ($opt_r); push (@REPORTS, "popularity") if ($opt_z eq "pornopolis" || $opt_z eq "rotten"); push (@REPORTS, "vampyre") if ($opt_v); push (@REPORTS, "daily_visitors") if ($opt_u); my ($uname) = $opt_n =~ /^\S+\.(\S+)\.\S+$/; my ($d1, $m1, $y1) = $start_date =~ /^(\d+)\/(\S+)\/(\d{4})/; my ($d2, $m2, $y2) = $end_date =~ /^(\d+)\/(\S+)\/(\d{4})/; my $filename = sprintf "%04d%02d%02d-%04d%02d%02d-%s", $y1, "5551212", $d1, $y2, "5551212", $d2, $uname; open (OUT, ">>$filename.txt") or die ("can't write to $file, $!"); my $link_r = "00-latest-report.txt"; unlink $link_r if (-f $link_r); `ln -s $filename.txt $link_r`; foreach (@REPORTS) { print STDERR "stoic: writing report $_ ..\n\n" if ($opt_x); &$_ (\*OUT); print OUT "\n"; } close OUT; if ($opt_p) { open (REF_RAW, $opt_r) or die "duh: can't open $opt_r ??"; while () { chomp; $REFERRAL{$_}++; } close REF_RAW; my $s = "$m1/$d1/$y1"; my $e = "$m2/$d2/$y2"; open (REF_HTML, ">>ref-$filename.html"); my $link_f = "00-latest-referrals.html"; unlink $link_f if (-f $link_f); `ln -s ref-$filename.html $link_f`; print REF_HTML< referrals for $filename
FIN my $t = 0; foreach (sort { $REFERRAL{$b} <=> $REFERRAL{$a} } keys %REFERRAL) { print REF_HTML < FIN $t += $REFERRAL{$_}; } print REF_HTML <
$uname: $s to $e
$REFERRAL{$_} $_
Total:
$t referrals
stoic.pl 1.2a
Area Systems Confidential
bugs to staff\@area.com
FIN close REF_HTML; } exit 0; # done sub usage { print "stoic $version\n"; } ##################### # support functions # ##################### sub logdate { my ($sec,$min,$hour,$mday,$mon,$year) = localtime (time); $when = sprintf ("%02d/%02d/%02d %02d:%02d", $mon + 1, $mday, $year, $hour, $min); return $when; } sub nicedate { my ($sec,$min,$hour,$mday,$mon,$year) = localtime(time); $mon++; $when = "$mon/$mday/$year"; return $when; } sub amount { local ($num) = @_; my ($tag) = "K"; $num = int (($num + 512) / 1024); if ($num >= 10000) { $num = int (($num + 512) / 1024); $tag = "M"; if ($num >= 10000) { $num = int (($num + 512) / 1024); $tag = "G"; } } return "$num$tag"; } sub commas { local ($_) = @_; 1 while s/(.*\d)(\d\d\d)/$1,$2/; $_; } sub is_local_url { local ($url) = @_; return 1 if ($opt_z && $url =~ /^http:\/\/(www([0-9]?)\.)?$opt_z\.(area\.)?com/i ); if ($opt_z && $opt_z eq "pornopolis") { return 1 if ( $url =~ /206.204.77.23/); # local IP address return 1 if ( $url =~ /senorita.com/i ); } return 1 if ($url =~ /^file:/i); return 0; } #################### # report functions # #################### sub referral { my $fh = shift; my $engine_count = 0; print $fh "engine referrals:\n\n"; foreach $eng (sort { $ENGINES{$b} <=> $ENGINES{$a} } keys %ENGINES) { if (! $engine_count) { printf $fh "%7s %s\n", "count", "referral"; printf $fh "%7s %s\n", "-----", "--------"; } $engine_count++; printf $fh "%7s %s\n", $ENGINES{$eng}, "$eng"; } print $fh " - none -\n" if (!$engine_count); } sub agent { my $fh = shift; my $total_browsers = 0; foreach (sort keys %BROWSERS) { $total_browsers += $BROWSERS{$_}; } print $fh "browser wars:\n\n"; printf $fh "%7s %7s %s\n", "count", "percent", "browser"; printf $fh "%7s %7s %s\n", "-----", "-------", "-------"; foreach (sort { $BROWSERS{$b} <=> $BROWSERS{$a} } keys %BROWSERS) { printf $fh "%7s %7s%% %s\n", &commas ($BROWSERS{$_}), &percent ($BROWSERS{$_}, $total_browsers), $_; } } sub percent { ($sum, $total) = @_; $pct = int ((($sum / $total) + .005) * 100); $pct = "< 1" if (! $pct); return $pct; } sub platforms { my $fh = shift; print $fh "platforms visiting:\n\n"; printf $fh "%7s %s\n", "count", "platform"; printf $fh "%7s %s\n", "-----", "--------"; foreach (sort { $PLATFORMS{$b} <=> $PLATFORMS{$a} } keys %PLATFORMS) { printf $fh "%7s %s\n", &commas ($PLATFORMS{$_}), $_; } } sub robots { my $fh = shift; print $fh "robots visiting:\n\n"; if (! %ROBOTS) { print $fh " - none -\n"; return; } printf $fh "%7s %9s %s\n", "count", "last", "robot"; printf $fh "%7s %9s %s\n", "-----", "----", "-----"; foreach (sort { $ROBOTS{$b} <=> $ROBOTS{$a} } keys %ROBOTS) { printf $fh "%7s %9s %s\n", &commas ($ROBOTS{$_}), $ROBOTS_LAST{$_}, $_; } } sub daily { my $fh = shift; my @MONTHS = qw (Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my $num_sites; print $fh "hits sorted by date:\n\n"; printf $fh "%12s %12s %12s %12s %s\n", "date", "hits", "bytes", "transferred", "unique sites"; printf $fh "%12s %12s %12s %12s %s\n", "----", "----", "-----", "-----------", "------------"; foreach (@date_order) { $num_sites = 0; foreach (keys %{$DAILY_HOSTS{$_}}) { $num_sites++; } printf $fh "%12s %12s %12s %12s %s\n", $_, &commas ($DAILY_HITS{$_}), &commas ($DAILY_BYTES{$_}), &amount ($DAILY_BYTES{$_}), &commas ($num_sites); } } sub domains { my $fh = shift; my $count = 0; print $fh "top level domains visiting:\n\n"; printf $fh "%7s %7s %s\n", "hits", "domain", "country"; printf $fh "%7s %7s %s\n", "----", "------", "-------"; foreach (sort { $DOMAINS{$b} <=> $DOMAINS{$a} } keys %DOMAINS) { printf $fh "%7s %7s %s\n", &commas ($DOMAINS{$_}), "$_", $COUNTRY{$_}; last if ($opt_h ne "all" && $opt_h == ++$count); } } sub hourly { my $fh = shift; my $hourly_totals = 0; print $fh "hits by time of day:\n\n"; printf $fh "%7s %6s %s\n", "hour", "count", "percent"; printf $fh "%7s %6s %s\n", "----", "-----", "-------"; foreach $i (sort keys %HOUR_OF_DAY) { printf $fh " %02d:00 %6s %3d%%\n", $i, &commas ($HOUR_OF_DAY{$i}), &percent ($HOUR_OF_DAY{$i}, $hits); } } sub documents { my $fh = shift; my $count = 0; if ($opt_f ne "all") { print $fh "top $opt_d documents accessed:\n\n"; } else { print $fh "documents accessed:\n\n"; } printf $fh "%7s %s\n", "hits", "document"; printf $fh "%7s %s\n", "----", "--------"; foreach (sort { $DOCUMENTS{$b} <=> $DOCUMENTS{$a} } keys %DOCUMENTS) { printf $fh "%7s %s\n", &commas ($DOCUMENTS{$_}), $_; last if ($opt_d ne "all" && $opt_d == ++$count); } } sub sites { my $fh = shift; my $count = 0; if ($opt_s ne "all") { print $fh "top $opt_s sites:\n\n"; } else { print $fh "unique sites visiting:\n\n"; } printf $fh "%7s %s\n", "hits", "remote host"; printf $fh "%7s %s\n", "----", "-----------"; foreach (sort { $HOSTS{$b} <=> $HOSTS{$a} } keys %HOSTS) { printf $fh "%7s %s\n", &commas ($HOSTS{$_}), $_; last if ($opt_s ne "all" && $opt_s == ++$count); } } sub status { my $fh = shift; print $fh "status codes:\n\n"; printf $fh "%7s %4s %s\n", "count", "code", "message"; printf $fh "%7s %4s %s\n", "-----", "----", "-------"; foreach (sort { $STATUS{$b} <=> $STATUS{$a} } keys %STATUS) { printf $fh "%7s %4s %s\n", &commas ($STATUS{$_}), $_, $STATUS_CODES{$_}; } } sub totals { my $fh = shift; my $howlong = time - $start; my $size = -s $access; my $num_sites = (keys %HOSTS); if ($opt_b) { $RUNNING{'logsize'} += $size; $RUNNING{'hits'} += $hits; $RUNNING{'hits-main'} += $hits_to_home_page; $RUNNING{'bytes'} += $total_bytes; $RUNNING{'sites'} += $num_sites; } print $fh "totals for " . ($opt_n ? $opt_n : $access) . ":\n\n"; print $fh " log start: $start_date\n"; print $fh " log finish: $end_date\n"; print $fh " hits: " . &commas ($hits) . "\n"; print $fh " home page hits: " . &commas ($hits_to_home_page) . "\n"; print $fh "bytes transferred: " . &commas ($total_bytes) . " (" . &amount ($total_bytes) . ")\n"; print $fh " unique sites: " . &commas ($num_sites) . "\n"; print $fh " sites ignored: " . &commas ($ignored) . " site" . ($ignored == 1 ? "" : "s") . "\n"; print $fh " bad input: " . &commas ($fail) . " line" . ($fail == 1 ? "" : "s") . "\n"; print $fh " time to execute: " . &commas ($howlong) . " seconds " . "[$start_time - " . &logdate . "]\n"; print $fh " logfile: $access (" . &amount ($size) . ")\n"; print $fh "\n"; if ($opt_b) { print $fh "running totals:\n\n"; print $fh " hits: " . &commas ($RUNNING{'hits'}) . "\n"; print $fh " home page hits: " . &commas ($RUNNING{'hits-main'}) . "\n"; print $fh "bytes transferred: " . &commas ($RUNNING{'bytes'}) . "\n"; print $fh " unique sites: " . &commas ($RUNNING{'sites'}) . "\n"; print $fh " log totals: " . &amount ($RUNNING{'logsize'}) . " processed\n"; } # timing file if ($opt_t) { $opt_n = $access if (! $opt_n); open (TIMING, ">>$opt_t") or die ("can't append to $opt_t, $!"); printf TIMING "%-27s [%s - %s] " . &amount ($size) . " %s seconds\n", $opt_n, $start_time, substr (&logdate, 9, 6), &commas ($howlong); close TIMING; } } sub user_sites { my $fh = shift; my $user, $count; print $fh "users and the sites they visit from:\n\n"; printf $fh "%-12s %-36s %-8s %s\n", "user", "sites", "hits", "transferred"; printf $fh "%-12s %-36s %-8s %s\n", "----", "-----", "----", "-----------"; foreach $user (sort keys %USER_BYTES) { printf $fh "%-12s ", $user; $count = 0; for $key (sort keys %{$USER_SITES_HITS{$user}}) { if ($count++) { print $fh " " x 14; } printf $fh "%-36s %-8s %s\n", $key, &commas ($USER_SITES_HITS{$user}{$key}), &amount ($USER_SITES_BYTES{$user}{$key}); } } } sub user_report { my $fh = shift; my $num = 0; print $fh "\ntransfers per authenticated user:\n\n"; printf $fh "%3s %-10.10s %-11.11s %10s %14s (%s)\n", "#", "user", "last", "hits", "data", "mb"; printf $fh "%3s %-10.10s %-11.11s %10s %14s ----\n", "-", "----", "----", "----", "----"; foreach $key (sort { $USER_BYTES{$b} <=> $USER_BYTES{$a} } keys %USER_BYTES) { $num++; printf $fh "%3s %-10.10s %-11.11s %10s %14s (%s)\n", $num, $key, $USER_LAST{$key}, &commas ($USER_HITS{$key}), &commas ($USER_BYTES{$key}), &amount ($USER_BYTES{$key}); } } sub popularity { my $fh = shift; my $total_section_hits = 0; print $fh "areas by popularity:\n\n"; printf $fh "%10s %10s %s\n", "hits", "percent", "area"; printf $fh "%10s %10s %s\n", "----", "-------", "----"; foreach (keys %SECTIONS) { $total_section_hits += $SECTIONS{$_}; } foreach $sect (sort { $SECTIONS{$b} <=> $SECTIONS{$a} } keys %SECTIONS) { printf $fh "%10s %10s%% %s\n", &commas ($SECTIONS{$sect}), &percent ($SECTIONS{$sect}, $total_section_hits), $sect; } } sub vampyre { my $fh = shift; return; my $total_cs_hits = 0; my $count = 0; print $fh "dean stark visits:\n\n"; foreach (keys %CS_SITES) { $total_cs_hits += $CS_SITES{$_}; } if (! $total_cs_hits) { print $fh " - none -\n"; return; } printf $fh "%10s %10s %10s %s\n", "hits", "percent", "last", "site"; printf $fh "%10s %10s %10s %s\n", "----", "-------", "----", "----"; foreach $sect (sort { $CS_SITES{$b} <=> $CS_SITES{$a} } keys %CS_SITES) { printf $fh "%10s %10s%% %10s %s\n", &commas ($CS_SITES{$sect}), &percent ($CS_SITES{$sect}, $total_cs_hits), $CS_SITES_LAST{$sect}, $sect; } } sub daily_visitors { my $fh = shift; my $count = 0; my $when; if ($opt_u eq "all") { print $fh "all sites visiting each day:\n\n"; } else { print $fh "visitors for the last $opt_u day" . ($opt_u == 1 ? "" : "s") . ":\n\n"; } foreach $when (reverse @DAILY_ORDER) { ++$count; last if ($opt_u ne "all" && $count > $opt_u); print $fh "unique sites visiting on $when:\n\n"; printf $fh "%10s %s\n", "hits", "site"; printf $fh "%10s %s\n", "----", "----"; foreach (sort keys %{$DAILY_VISITORS{$when}}) { printf $fh "%10s %s\n", $DAILY_VISITORS{$when}{$_}, $_; } print $fh "\n"; } }