http://www.byte.com/art/download/articles.pl (PC Press Internet CD, 03/1996)
# ARTICLES - Reads wwwstat output for BYTE archive, reports frequency
# of use of articles by issue (e.g. Feb 1995) or section (e.g. News and Views)
#
# Expects one or more files matching 9?????.htm containing wwwstat output.
# Expects all.at containing mapping between URLs and article titles.
#
# Jon Udell, BYTE, jon_u@dev5.byte.com,judelL@bix.com
# partial results for Jan 15, 1996:
# global view
#
# 000910 January 1996 Cover Story The World's Fastest Computers
# 000136 September 1995 20th Anniversary Top 20 Small Systems
# 000111 September 1995 20th Anniversary 20 Most Important Chips
#
#
# 960115: issue view
#
# 000910 January 1996 Cover Story The World's Fastest Computers
# 000051 January 1996 The Byte Network Pro Server Management
# 000038 January 1996 Pournelle Dead Chickens and Portable Data
# 000033 January 1996 Editorial Web Sites: Don't Blink
# 000019 January 1996 International News & Client/Server: Europe Trails U.S.
# 000016 January 1996 International Featur A Consultant in Your PC
# 000013 January 1996 International What's Video and TV Board for PCI Systems
# 000011 January 1996 International What's HotMetal Pro Creates HTML Documents
#
# 960115: section view
#
# 000051 January 1996 The Byte Network Pro Server Management
# 000033 December 1995 The Byte Network Pro Perl Magic
# 000014 September 1995 The Byte Network Pro Web Search
# 000014 August 1995 The Byte Network Pro Live Wire
($arg = $ARGV[0]) =~ s/\.htm//;
$globmin = $ARGV[1]; # threshold: e.g. 1000 for entire history of site, 100 for 1 day
$viewmin = $ARGV[2]; # threshold: e.g. 100 for entire history of site, 10 for 1 day
open(LST, "all.at") || die "Cannot open all.at";
open(LOG, ">$arg.at1") || die "Cannot create $arg.at1";
open(SUM, ">$arg.at2") || die "Cannot create $arg.at2";
if ($arg eq "all")
{$arg = "9?????.htm";}
else
{$arg = "$arg.htm";}
$loghits = 0;
# build a url array, and two related associative arrays keyed on url
# 1. titles
# 2. counts
while()
{
chop;
($url,$issue,$section,$title) = split(/~/);
push(@urls,$url);
$titles{$url} = $issue . "~" . $section . "~" . $title;
$counts{$url} = "000000";
}
# process wwwstat output
foreach $f (<${arg}>)
{
print $f . ' ';
open(F,"$f") || die "Cannot open $f";
while ()
{
tr/A-Z/a-z/;
# limit to URLs that are really articles, no table of contents pages or gifs
if (/\/art\/[0-9]+\// ||/\/art\/special\//|| /\/art\/bonus\// || /\/bmark\//)
{
s/^ +//;
@line = split(/ \| /,$_,2);
$stats = $line[0];
$url = $line[1];
chop($url);
($preqs,$pbytes,$bytes,$reqs) = split(/ +/,$stats,4);
if ( (/\/bmark\// && /bytecpu\.exe/) ||
( (! /\/bmark\//) && (! /gif/) && (! /\/[0-9]+\.htm/ ) && (! /\/sec[0-9]+\.htm/) )
)
{
$counts{$url} = sprintf("%06d",$counts{$url}+$reqs);
$loghits += $reqs;
}
}
}
close F;
}
foreach $url (@urls) # attach counts to titles
{
push (@subtots,$titles{$url} . "~~" . $counts{$url} );
}
@subtots = sort @subtots;
foreach $subtot (@subtots) # condense duplicate titles and add up subtotals
{
($title,$count,$url) = split(/~~/,$subtot);
if ($title ne $prevtitl)
{
push(@totals,sprintf("%06d",$totcount) . "~" . $prevtitl );
$totcount = $count;
$prevtitl = $title;
}
else
{
$totcount += $count;
}
}
foreach $total (reverse sort @totals) # break out totals by issue and section
{
($hits,$issue,$section,$title) = split(/~/,$total);
$_ = $total;
#section views
if ( /Editorial/)
{push(@editorial,$total)};
if ( /Letters/)
{push(@letters,$total)};
if (/News & Views/)
{push(@news,$total)};
if (/Blasts/)
{push(@blasts,$total)};
if ( /Book & CD/)
{push(@books,$total)};
if ( /BYTE Awards/)
{push(@awards,$total)};
if ( /Solutions Focus/)
{push(@solutions,$total)};
if (/Features/)
{push(@features,$total)};
if (/Byte Network Project/)
{push(@netproj,$total)};
if (/State Of The Art/)
{push(@sota,$total)};
if (/Special Report/)
{push(@special,$total)};
if (/20th Anniversary/)
{push(@anniv,$total)};
if ( /Cover Story/)
{push(@cover,$total)};
if ( ( /Reviews/) && (! /Book/) )
{push(@reviews,$total)};
if ( /BYTE Lab/)
{push(@lab,$total)};
if ( /Core Technologies/)
{push(@core,$total)};
if ( /Pournelle/)
{push(@pournelle,$total)};
if ( /What\'s New/)
{push(@what,$total)};
if ( /Commentary/)
{push(@comment,$total)};
#issue views
if (/January 1996/)
{push(@9601,$total)};
if (/December 1995/)
{push(@9512,$total)};
if (/November 1995/)
{push(@9511,$total)};
if (/October 1995/)
{push(@9510,$total)};
if (/September 1995/)
{push(@9509,$total)};
if (/August 1995/)
{push(@9508,$total)};
if (/July 1995/)
{push(@9507,$total)};
if (/June 1995/)
{push(@9506,$total)};
if (/May 1995/)
{push(@9505,$total)};
if (/April 1995/)
{push(@9504,$total)};
if (/March 1995/)
{push(@9503,$total)};
if (/February 1995/)
{push(@9502,$total)};
if (/January 1995/)
{push(@9501,$total)};
if (/December 1994/)
{push(@9412,$total)};
if (/November 1994/)
{push(@9411,$total)};
if (/October 1994/)
{push(@9410,$total)};
if (/September 1994/)
{push(@9409,$total)};
if (/August 1994/)
{push(@9408,$total)};
if (/July 1994/)
{push(@9407,$total)};
if (/June 1994/)
{push(@9406,$total)};
if (/May 1994/)
{push(@9405,$total)};
if (/April 1994/)
{push(@9404,$total)};
if (/March 1994/)
{push(@9403,$total)};
if (/February 1994/)
{push(@9402,$total)};
if (/January 1994/)
{push(@9401,$total)};
# other views
if ( /BYTEmarks/)
{push(@bytemarks,$total)};
$arthits += $hits;
if ($hits > $globmin) # print summary part of report for items above global threshold
{
$s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n",
$hits,$issue,$section,$title);
print SUM $s;
}
}
print SUM "Loghits total: $loghits\n";
print SUM "Arthits total: $arthits\n";
print LOG "Loghits total: $loghits\n";
print LOG "Arthits total: $arthits\n";
# do issue and section parts of report
&View("bytemarks",@bytemarks);
&View("issue",@9601);
&View("issue",@9512);
&View("issue",@9511);
&View("issue",@9510);
&View("issue",@9509);
&View("issue",@9508);
&View("issue",@9507);
&View("issue",@9506);
&View("issue",@9505);
&View("issue",@9504);
&View("issue",@9503);
&View("issue",@9502);
&View("issue",@9501);
&View("issue",@9412);
&View("issue",@9411);
&View("issue",@9410);
&View("issue",@9409);
&View("issue",@9408);
&View("issue",@9407);
&View("issue",@9406);
&View("issue",@9405);
&View("issue",@9404);
&View("issue",@9403);
&View("issue",@9402);
&View("issue",@9401);
&View("section",@editorial);
&View("section",@letters);
&View("section",@news);
&View("section",@blasts);
&View("section",@books);
&View("section",@awards);
&View("section",@solutions);
&View("section",@features);
&View("section",@netproj);
&View("section",@sota);
&View("section",@special);
&View("section",@anniv);
&View("section",@cover);
&View("section",@reviews);
&View("section",@lab);
&View("section",@core);
&View("section",@pournelle);
&View("section",@what);
&View("section",@comment);
sub View
{
local ($type,@view) = @_;
print SUM "$type view\n";
$arthits = 0;
foreach $total (reverse sort @view)
{
($hits,$issue,$section,$title) = split(/~/,$total);
$arthits += $hits;
$s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n",
$hits,$issue,$section,$title);
if ($hits > $viewmin)
{print SUM $s;}
}
}