#!/usr/bin/perl
# showstats

# if reptype is '' or 'dtr' - dir tree report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server, detail, honly, sortkey
#
# if reptype is 'dom' - domain report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server, detail
#
# if reptype is 'red' - redirect report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server

$ICON_REG='http://www.columbia.edu/httpd/getstats/getstats.gif';
$ICON_SEC='https://www1.columbia.edu/icons/getstats.gif';

$SRVPORT=$ENV{'SERVER_PORT'};		# get server's port number
if ($SRVPORT eq '443') {		# secure server
  $ICON_OUT=$ICON_SEC;
  $basedir='/etc/httpd/data/sec/acis/rad/web-usage';
  $baseurl='https://www1.columbia.edu/sec/acis/rad/web-usage';
} else {				# regular server
  $ICON_OUT=$ICON_REG;
  $basedir='/etc/httpd/data/httpd/reports';
  $baseurl='http://www.columbia.edu/httpd/reports';
}

					# set defaults
$reptype='dtr';
$server='www';				# default server
$repdate='';
$webdir='';
$detail=1;
$honly='';
$sortkey='req';				# alp or req
$debug='';

if ($ENV{'REQUEST_METHOD'} eq 'GET') {	# get the input using method=GET
  $buffer = $ENV{'QUERY_STRING'};
} else {				# get the input using method=POST
  read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
}
@pairs = split(/&/, $buffer);		# split the name-value pairs

foreach $pair (@pairs) {		# put values into scalar variables
  ($name, $value) = split(/=/, $pair);

  # Un-Webify plus signs, undo the %-encoding
  $value =~ tr/+/ /;
  $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

  if ($name eq 'reptype') {		# report type: dtr, dom, red
    $reptype = $value;
  } elsif ($name eq 'server') {		# web server short name
    $server = $value;
  } elsif ($name eq 'repdate') {	# 8 digit report date
    $repdate = $value;
  } elsif ($name eq 'webdir') {		# desired directory
    $webdir = $value;
  } elsif ($name eq 'detail') {		# levels of detail
    $detail = $value;
  } elsif ($name eq 'honly') {		# count HTML files only
    $honly = $value;
  } elsif ($name eq 'sortkey') {	# alp or req
    $sortkey = $value;
  } elsif ($name eq 'debug') {
    $debug = $value;
  } else {
    &errmsg2("not a supported option: $name=$value");
  }
}

#$uni=$ENV{'REMOTE_USER'};		# Kerberos handle
#$emain=$ENV{'USER_EMAIL'};		# email address
#$person="$uni &lt;$emain&gt;";

&errmsg2("repdate must be specified") unless $repdate;
&errmsg2("repdate must be numeric: $repdate")
				if $repdate =~ /\D/;
&errmsg2("repdate must be eight digits: $repdate")
				unless length($repdate)==8;

$repdir="$basedir/$server";
$baseurl.="/$server";
chop($webdir) if $webdir =~ m|/$|;	# remove trailing slash
$webdir = '/' . $webdir unless $webdir =~ m|^/|; # add leading slash
@wdirs = split('/', $webdir);		# count the slashes in webdir
$detail = $#wdirs if $detail < $#wdirs;	# increase detail if necessary
undef(@wdirs);
$webdir='' if $webdir eq '/';

if ($reptype eq 'dom') {
  $sortkey='req';
  $detail=2 if $detail > 2;
  $baseurl .= '/dom';
  $repdir  .= '/dom';
}
if (($server ne 'www') && ($server ne 'www1') && ($server ne 'wwwc')) {
  $honly='on';				# other servers only have these
  $sortkey='req';
  $detail=4 if $detail > 4;
}
$reptype = 'htr' if ($reptype eq 'dtr') && $honly;

if (!$webdir) {				# user wants FULL REPORT: DTR DOM RED
  if ($reptype eq 'red') {
    print "Location: $baseurl/$repdate.redir.html\n\n";
  } else {		# URL of the form /httpd/reports/19970816.dtrreq3.html
    print "Location: $baseurl/$repdate.$reptype$sortkey$detail.html\n\n";
  }
  exit;
}

if ($reptype eq 'dom') {		# user wants partial domain report
  $detail=2;
  # construct URL of the form /httpd/reports/cu/help/19970816.domreq2.html
  print "Location: $baseurl$webdir/$repdate.$reptype$sortkey$detail.html\n\n";
  exit;
}

print "Content-type: text/html\n\n";	# user wants partial dir tree report

&errmsg('detail must be at least 1')           if $detail < 1;
&errmsg('detail must not be greater than 6')   if $detail > 6;

# construct file name of the form "19950114.dtrreq3.html"
if ($reptype eq 'red') {
  $repfilnam = "$repdir/$repdate.redir.html";
} else {
  $repfilnam = "$repdir/$repdate.$reptype$sortkey$detail.html";
}
if (!open(REP, "<$repfilnam")) {
  &errmsg("Can't open report file $repfilnam: $!");
}

$visible=1;
$dtr_header='';
while (<REP>) {
  # skip the summary and the top 10 list
  # save the Directory Tree Report header in memory

  # must display icon from server this CGI script is running on
  #  to prevent Netscape warnings about mixing secure and unsecure data
  s|$ICON_REG|$ICON_OUT|o;
  s|$ICON_SEC|$ICON_OUT|o;
  if (/\<!-- end summary --\>/) {
    $visible=1;
    next;
  }
  $visible='' if /\<!-- begin summary --\>/;
  s/Directory Tree Report/$& for $webdir/;	# append dir name
  s/Redirect Report/$& for $webdir/;
  $dtr_header .= $_ if $visible;

  if ($visible && /------------------/) { # line after "Dir/file Req"
    $doffset = length($`);		# find starting column for URLs
    last;
  }
}

&errmsg("$webdir not found in $detail level report for $srv server")
		unless &extract_section($webdir, $dtr_header);

$visible='';
while (<REP>) {
    $visible=1  if /Created by /;
    print if $visible;
}
close REP;
exit;

########################################

sub extract_section {
    # print the relevant portion of the web usage report
    # returns 1 on success, '' otherwise

    local($webdir, $dheader) = @_;
    local($visible, $dirlevel, $offset, $tlev, $m, $j);
    local(@wdirs) = split('/', $webdir);

    # directory components are stored in the @wdirs array
    # e.g. /cu/help is stored this way:
    #    $wdir[1] = 'cu'
    #    $wdir[2] = 'help'


    $dirlevel = $#wdirs;		# levels specified by the user
    foreach $wdir (@wdirs) {
	$wdir .= '/';
    }

repline:
    while (<REP>) {
	last repline if m|^\</pre\>|;
	chop($rline=$_);
	# set tlev to the level of the current line (0 to 6)
	for ($tlev = 0; $tlev <= 6; $tlev++) {
	    local($rlev) = $tlev;
	    local($roffset) = 0;	# level 0, indent 0
	    if ($rlev) {		# level 1, indent 1
		$roffset++;
		$rlev--;
	    }
	    $roffset += $rlev*3;	# indent 3 for each level past 1
	    $offset = $doffset+$roffset;
	    if (substr($rline, $offset, 1) ne ' ') {
		$dirname[$tlev] = substr($rline, $offset);
		last;
	    }
	}
	if ($visible) {
	    if (($tlev < $dirlevel) ||
		(($tlev == $dirlevel) && ($wdirs[$tlev] ne $dirname[$tlev]))) {
		last repline;		# we are done
	    }
	    print "$rline\n";
	} else {
	    next repline if $tlev != $dirlevel; # keep looking
	    $m = 1;
	    for ($j = 1; $j <= $dirlevel; $j++) {
		if ($wdirs[$j] ne $dirname[$j]) {
		    $m = 0;
		    last;
		}
	    }
	    if ($m == 1) {		# found beginning of section
		$visible = 1;
		print $dheader;
		print "$rline\n";
	    }
	}
    }
    return $visible;
} # extract_section



sub errmsg {				# print an error message
    local($mess) = @_;
    print "<head><title>Showstats Error</title></head>
<body><h1>Showstats Error</h1>
$mess</body>";
    exit;
}



sub errmsg2 {				# header + error message
    local($mess) = @_;
    print "Content-type: text/html\n
<head><title>Showstats Error</title></head>
<body><h1>Showstats Error</h1>
$mess</body>";
    exit;
}
