(PC Press Internet CD, 03/1996)
# ARTICLES - Reads wwwstat output for BYTE archive, reports frequency
# of use of articles by issue (e.g. Feb 1995) or section (e.g. News and Views)
# Expects one or more files matching 9?????.htm containing wwwstat output.
# Expects containing mapping between URLs and article titles.
# Jon Udell, BYTE,,
# partial results for Jan 15, 1996:
# global view
# 000910 January 1996 Cover Story The World's Fastest Computers
# 000136 September 1995 20th Anniversary Top 20 Small Systems
# 000111 September 1995 20th Anniversary 20 Most Important Chips
# 960115: issue view
# 000910 January 1996 Cover Story The World's Fastest Computers
# 000051 January 1996 The Byte Network Pro Server Management
# 000038 January 1996 Pournelle Dead Chickens and Portable Data
# 000033 January 1996 Editorial Web Sites: Don't Blink
# 000019 January 1996 International News & Client/Server: Europe Trails U.S.
# 000016 January 1996 International Featur A Consultant in Your PC
# 000013 January 1996 International What's Video and TV Board for PCI Systems
# 000011 January 1996 International What's HotMetal Pro Creates HTML Documents
# 960115: section view
# 000051 January 1996 The Byte Network Pro Server Management
# 000033 December 1995 The Byte Network Pro Perl Magic
# 000014 September 1995 The Byte Network Pro Web Search
# 000014 August 1995 The Byte Network Pro Live Wire
($arg = $ARGV[0]) =~ s/\.htm//;
$globmin = $ARGV[1]; # threshold: e.g. 1000 for entire history of site, 100 for 1 day
$viewmin = $ARGV[2]; # threshold: e.g. 100 for entire history of site, 10 for 1 day
open(LST, "") || die "Cannot open";
open(LOG, ">$arg.at1") || die "Cannot create $arg.at1";
open(SUM, ">$arg.at2") || die "Cannot create $arg.at2";
if ($arg eq "all")
{$arg = "9?????.htm";}
{$arg = "$arg.htm";}
$loghits = 0;
# build a url array, and two related associative arrays keyed on url
# 1. titles
# 2. counts
($url,$issue,$section,$title) = split(/~/);
$titles{$url} = $issue . "~" . $section . "~" . $title;
$counts{$url} = "000000";
# process wwwstat output
foreach $f (<${arg}>)
print $f . ' ';
open(F,"$f") || die "Cannot open $f";
while ()
# limit to URLs that are really articles, no table of contents pages or gifs
if (/\/art\/[0-9]+\// ||/\/art\/special\//|| /\/art\/bonus\// || /\/bmark\//)
s/^ +//;
@line = split(/ \| /,$_,2);
$stats = $line[0];
$url = $line[1];
($preqs,$pbytes,$bytes,$reqs) = split(/ +/,$stats,4);
if ( (/\/bmark\// && /bytecpu\.exe/) ||
( (! /\/bmark\//) && (! /gif/) && (! /\/[0-9]+\.htm/ ) && (! /\/sec[0-9]+\.htm/) )
$counts{$url} = sprintf("%06d",$counts{$url}+$reqs);
$loghits += $reqs;
close F;
foreach $url (@urls) # attach counts to titles
push (@subtots,$titles{$url} . "~~" . $counts{$url} );
@subtots = sort @subtots;
foreach $subtot (@subtots) # condense duplicate titles and add up subtotals
($title,$count,$url) = split(/~~/,$subtot);
if ($title ne $prevtitl)
push(@totals,sprintf("%06d",$totcount) . "~" . $prevtitl );
$totcount = $count;
$prevtitl = $title;
$totcount += $count;
foreach $total (reverse sort @totals) # break out totals by issue and section
($hits,$issue,$section,$title) = split(/~/,$total);
$_ = $total;
#section views
if ( /Editorial/)
if ( /Letters/)
if (/News & Views/)
if (/Blasts/)
if ( /Book & CD/)
if ( /BYTE Awards/)
if ( /Solutions Focus/)
if (/Features/)
if (/Byte Network Project/)
if (/State Of The Art/)
if (/Special Report/)
if (/20th Anniversary/)
if ( /Cover Story/)
if ( ( /Reviews/) && (! /Book/) )
if ( /BYTE Lab/)
if ( /Core Technologies/)
if ( /Pournelle/)
if ( /What\'s New/)
if ( /Commentary/)
#issue views
if (/January 1996/)
if (/December 1995/)
if (/November 1995/)
if (/October 1995/)
if (/September 1995/)
if (/August 1995/)
if (/July 1995/)
if (/June 1995/)
if (/May 1995/)
if (/April 1995/)
if (/March 1995/)
if (/February 1995/)
if (/January 1995/)
if (/December 1994/)
if (/November 1994/)
if (/October 1994/)
if (/September 1994/)
if (/August 1994/)
if (/July 1994/)
if (/June 1994/)
if (/May 1994/)
if (/April 1994/)
if (/March 1994/)
if (/February 1994/)
if (/January 1994/)
# other views
if ( /BYTEmarks/)
$arthits += $hits;
if ($hits > $globmin) # print summary part of report for items above global threshold
$s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n",
print SUM $s;
print SUM "Loghits total: $loghits\n";
print SUM "Arthits total: $arthits\n";
print LOG "Loghits total: $loghits\n";
print LOG "Arthits total: $arthits\n";
# do issue and section parts of report
sub View
local ($type,@view) = @_;
print SUM "$type view\n";
$arthits = 0;
foreach $total (reverse sort @view)
($hits,$issue,$section,$title) = split(/~/,$total);
$arthits += $hits;
$s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n",
if ($hits > $viewmin)
{print SUM $s;}