head	1.5;
access;
symbols;
locks; strict;
comment	@# @;


1.5
date	2001.02.20.20.56.21;	author beecher;	state Exp;
branches;
next	1.4;

1.4
date	99.11.23.03.31.58;	author beecher;	state Exp;
branches;
next	1.3;

1.3
date	99.11.23.03.29.06;	author beecher;	state Exp;
branches;
next	1.2;

1.2
date	99.04.04.19.08.41;	author beecher;	state Exp;
branches;
next	1.1;

1.1
date	99.03.30.16.22.33;	author beecher;	state Exp;
branches;
next	;


desc
@get full or partial web usage report
@


1.5
log
@don't call getconfig because we don't need it
@
text
@#!/usr/bin/perl
# showstats

# if reptype is '' or 'dtr' - dir tree report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server, detail, honly, sortkey
#
# if reptype is 'dom' - domain report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server, detail
#
# if reptype is 'red' - redirect report
# complete report if webdir is null, otherwise extract the relevant dir
# also accepts repdate, server

$ICON_REG='http://www.columbia.edu/httpd/getstats/getstats.gif';
$ICON_SEC='https://www1.columbia.edu/icons/getstats.gif';

$SRVPORT=$ENV{'SERVER_PORT'};		# get server's port number
if ($SRVPORT eq '443') {		# secure server
  $ICON_OUT=$ICON_SEC;
  $basedir='/etc/httpd/data/sec/acis/rad/web-usage';
  $baseurl='https://www1.columbia.edu/sec/acis/rad/web-usage';
} else {				# regular server
  $ICON_OUT=$ICON_REG;
  $basedir='/etc/httpd/data/httpd/reports';
  $baseurl='http://www.columbia.edu/httpd/reports';
}

					# set defaults
$reptype='dtr';
$server='www';				# default server
$repdate='';
$webdir='';
$detail=1;
$honly='';
$sortkey='req';				# alp or req
$debug='';

if ($ENV{'REQUEST_METHOD'} eq 'GET') {	# get the input using method=GET
  $buffer = $ENV{'QUERY_STRING'};
} else {				# get the input using method=POST
  read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
}
@@pairs = split(/&/, $buffer);		# split the name-value pairs

foreach $pair (@@pairs) {		# put values into scalar variables
  ($name, $value) = split(/=/, $pair);

  # Un-Webify plus signs, undo the %-encoding
  $value =~ tr/+/ /;
  $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

  if ($name eq 'reptype') {		# report type: dtr, dom, red
    $reptype = $value;
  } elsif ($name eq 'server') {		# web server short name
    $server = $value;
  } elsif ($name eq 'repdate') {	# 8 digit report date
    $repdate = $value;
  } elsif ($name eq 'webdir') {		# desired directory
    $webdir = $value;
  } elsif ($name eq 'detail') {		# levels of detail
    $detail = $value;
  } elsif ($name eq 'honly') {		# count HTML files only
    $honly = $value;
  } elsif ($name eq 'sortkey') {	# alp or req
    $sortkey = $value;
  } elsif ($name eq 'debug') {
    $debug = $value;
  } else {
    &errmsg2("not a supported option: $name=$value");
  }
}

#$uni=$ENV{'REMOTE_USER'};		# Kerberos handle
#$emain=$ENV{'USER_EMAIL'};		# email address
#$person="$uni &lt;$emain&gt;";

&errmsg2("repdate must be specified") unless $repdate;
&errmsg2("repdate must be numeric: $repdate")
				if $repdate =~ /\D/;
&errmsg2("repdate must be eight digits: $repdate")
				unless length($repdate)==8;

$repdir="$basedir/$server";
$baseurl.="/$server";
chop($webdir) if $webdir =~ m|/$|;	# remove trailing slash
$webdir = '/' . $webdir unless $webdir =~ m|^/|; # add leading slash
@@wdirs = split('/', $webdir);		# count the slashes in webdir
$detail = $#wdirs if $detail < $#wdirs;	# increase detail if necessary
undef(@@wdirs);
$webdir='' if $webdir eq '/';

if ($reptype eq 'dom') {
  $sortkey='req';
  $detail=2 if $detail > 2;
  $baseurl .= '/dom';
  $repdir  .= '/dom';
}
if (($server ne 'www') && ($server ne 'www1') && ($server ne 'wwwc')) {
  $honly='on';				# other servers only have these
  $sortkey='req';
  $detail=4 if $detail > 4;
}
$reptype = 'htr' if ($reptype eq 'dtr') && $honly;

if (!$webdir) {				# user wants FULL REPORT: DTR DOM RED
  if ($reptype eq 'red') {
    print "Location: $baseurl/$repdate.redir.html\n\n";
  } else {		# URL of the form /httpd/reports/19970816.dtrreq3.html
    print "Location: $baseurl/$repdate.$reptype$sortkey$detail.html\n\n";
  }
  exit;
}

if ($reptype eq 'dom') {		# user wants partial domain report
  $detail=2;
  # construct URL of the form /httpd/reports/cu/help/19970816.domreq2.html
  print "Location: $baseurl$webdir/$repdate.$reptype$sortkey$detail.html\n\n";
  exit;
}

print "Content-type: text/html\n\n";	# user wants partial dir tree report

&errmsg('detail must be at least 1')           if $detail < 1;
&errmsg('detail must not be greater than 6')   if $detail > 6;

# construct file name of the form "19950114.dtrreq3.html"
if ($reptype eq 'red') {
  $repfilnam = "$repdir/$repdate.redir.html";
} else {
  $repfilnam = "$repdir/$repdate.$reptype$sortkey$detail.html";
}
if (!open(REP, "<$repfilnam")) {
  &errmsg("Can't open report file $repfilnam: $!");
}

$visible=1;
$dtr_header='';
while (<REP>) {
  # skip the summary and the top 10 list
  # save the Directory Tree Report header in memory

  # must display icon from server this CGI script is running on
  #  to prevent Netscape warnings about mixing secure and unsecure data
  s|$ICON_REG|$ICON_OUT|o;
  s|$ICON_SEC|$ICON_OUT|o;
  if (/\<!-- end summary --\>/) {
    $visible=1;
    next;
  }
  $visible='' if /\<!-- begin summary --\>/;
  s/Directory Tree Report/$& for $webdir/;	# append dir name
  s/Redirect Report/$& for $webdir/;
  $dtr_header .= $_ if $visible;

  if ($visible && /------------------/) { # line after "Dir/file Req"
    $doffset = length($`);		# find starting column for URLs
    last;
  }
}

&errmsg("$webdir not found in $detail level report for $srv server")
		unless &extract_section($webdir, $dtr_header);

$visible='';
while (<REP>) {
    $visible=1  if /Created by /;
    print if $visible;
}
close REP;
exit;

########################################

sub extract_section {
    # print the relevant portion of the web usage report
    # returns 1 on success, '' otherwise

    local($webdir, $dheader) = @@_;
    local($visible, $dirlevel, $offset, $tlev, $m, $j);
    local(@@wdirs) = split('/', $webdir);

    # directory components are stored in the @@wdirs array
    # e.g. /cu/help is stored this way:
    #    $wdir[1] = 'cu'
    #    $wdir[2] = 'help'


    $dirlevel = $#wdirs;		# levels specified by the user
    foreach $wdir (@@wdirs) {
	$wdir .= '/';
    }

repline:
    while (<REP>) {
	last repline if m|^\</pre\>|;
	chop($rline=$_);
	# set tlev to the level of the current line (0 to 6)
	for ($tlev = 0; $tlev <= 6; $tlev++) {
	    local($rlev) = $tlev;
	    local($roffset) = 0;	# level 0, indent 0
	    if ($rlev) {		# level 1, indent 1
		$roffset++;
		$rlev--;
	    }
	    $roffset += $rlev*3;	# indent 3 for each level past 1
	    $offset = $doffset+$roffset;
	    if (substr($rline, $offset, 1) ne ' ') {
		$dirname[$tlev] = substr($rline, $offset);
		last;
	    }
	}
	if ($visible) {
	    if (($tlev < $dirlevel) ||
		(($tlev == $dirlevel) && ($wdirs[$tlev] ne $dirname[$tlev]))) {
		last repline;		# we are done
	    }
	    print "$rline\n";
	} else {
	    next repline if $tlev != $dirlevel; # keep looking
	    $m = 1;
	    for ($j = 1; $j <= $dirlevel; $j++) {
		if ($wdirs[$j] ne $dirname[$j]) {
		    $m = 0;
		    last;
		}
	    }
	    if ($m == 1) {		# found beginning of section
		$visible = 1;
		print $dheader;
		print "$rline\n";
	    }
	}
    }
    return $visible;
} # extract_section



sub errmsg {				# print an error message
    local($mess) = @@_;
    print "<head><title>Showstats Error</title></head>
<body><h1>Showstats Error</h1>
$mess</body>";
    exit;
}



sub errmsg2 {				# header + error message
    local($mess) = @@_;
    print "Content-type: text/html\n
<head><title>Showstats Error</title></head>
<body><h1>Showstats Error</h1>
$mess</body>";
    exit;
}
@


1.4
log
@update for Solaris 7
@
text
@a15 4
unshift(@@INC, '/opt/ACISweb/lib');
require 'webconfig.pm';
&webconfig'getconfig('', \@@sname, \%lname, \%stype, \%droot, \%host1, \%host2);

@


1.3
log
@several changes
@
text
@d1 1
a1 1
#!/usr/local/bin/perl
d26 1
a26 1
  $basedir='/www/data/sec/acis/rad/web-usage';
d30 1
a30 1
  $basedir='/www/data/httpd/reports';
@


1.2
log
@recognize new summary report format on dir tree reports
@
text
@d6 1
a6 1
# also accepts repdate (or weeknum), server, detail, honly, sortkey
d16 3
a18 62
$DEFSERV='columbia';
$WEEKLIST='/www/data/httpd/reports/weeklist';

# use the statmap array to identify:
#	the server name
#	users authorized to see those reports (NULL means all users)
#	directory that contains the reports
#	URL of those reports

@@statmap = (
	['columbia', '',
	   '/www/data/httpd/reports/columbia',
	      'http://www.columbia.edu/httpd/reports/columbia'],
	['kermit', '',
	   '/www/data/httpd/reports/kermit',
	      'http://www.columbia.edu/httpd/reports/kermit'],
	['goaskalice', '',
	   '/www/data/httpd/reports/goaskalice',
	      'http://www.columbia.edu/httpd/reports/goaskalice'],
	['alice', '',
	   '/www/data/httpd/reports/alice',
	      'http://www.columbia.edu/httpd/reports/alice'],
	['ciao', '',
	   '/www/data/httpd/reports/ciao',
	      'http://www.columbia.edu/httpd/reports/ciao'],
	['ce', '',
	   '/www/data/httpd/reports/ce',
	      'http://www.columbia.edu/httpd/reports/ce'],
	['europanet', '',
	   '/www/data/httpd/reports/europanet',
	      'http://www.columbia.edu/httpd/reports/europanet'],
	['najp', '',
	   '/www/data/httpd/reports/najp',
	      'http://www.columbia.edu/httpd/reports/najp'],
	['wilder', '',
	   '/www/data/httpd/reports/wilder',
	      'http://www.columbia.edu/httpd/reports/wilder'],
	['wildones', '',
	   '/www/data/httpd/reports/wildones',
	      'http://www.columbia.edu/httpd/reports/wildones'],
	['worldmon', '',
	   '/www/data/httpd/reports/worldmon',
	      'http://www.columbia.edu/httpd/reports/worldmon'],

	# allow -beecher-dsm-ariel-gd17-walter-lynn-ggs2-
	['www1', '-hb24-dsm5-ariel0-gd17-shv1-wmb2-mej2-ggs2-',
	   '/wwws/data/acis/rad/web-usage/www1',
	      'https://www1.columbia.edu/sec/acis/rad/web-usage/www1'],

	# allow -beecher-dsm-ariel-gd17-walter-lynn-bk145-
	['wwwc', '-hb24-dsm5-ariel0-gd17-shv1-wmb2-mej2-bk145-',
	   '/wwws/data/acis/rad/web-usage/wwwc',
	      'https://www1.columbia.edu/sec/acis/rad/web-usage/wwwc']
	);

# use the secmap array to restrict users to a given directory
#  these restrictions only apply to the www1 server and the wwwc server
@@secmap = (
	['-bk145-',		'/sec/dlc/ciao'],
	['-ggs2-',		'/sec/cu/sipa/GULF2000'],
	);

d26 2
d30 2
d36 1
a36 1
$server=$DEFSERV;			# default server
a37 1
$weeknum='';
d41 1
a41 1
$sortkey='req';				# alp or req (byt no longer supported)
d52 1
a52 1
    ($name, $value) = split(/=/, $pair);
d54 23
a76 58
    # Un-Webify plus signs, undo the %-encoding
    $value =~ tr/+/ /;
    $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

    if ($name eq 'reptype') {		# report type: dtr, dom, red
	$reptype = $value;
    } elsif ($name eq 'server') {
	$server = $value;
    } elsif ($name eq 'repdate') {	# 8 digit report date
	$repdate = $value;
    } elsif ($name eq 'weeknum') {	# 0=current week, 1=one week ago
	$weeknum = $value;		# (for back compatibility)
    } elsif ($name eq 'webdir') {	# desired directory
	$webdir = $value;
    } elsif ($name eq 'detail') {	# levels of detail
	$detail = $value;
    } elsif ($name eq 'honly') {	# count HTML files only
	$honly = $value;
    } elsif ($name eq 'sortkey') {	# alp or req
	$sortkey = $value;
    } elsif ($name eq 'showsum') {	# include summary (no longer used)
	$showsum = $value;
    } elsif ($name eq 'debug') {
	$debug = $value;
    } else {
	print "Content-type: text/html\n
unexpected variable: $name=$value<br>\n";
    }
}

$uni=$ENV{'REMOTE_USER'};		# Kerberos handle
$emain=$ENV{'USER_EMAIL'};		# email address
$person="$uni &lt;$emain&gt;";

$secure_server = ($server eq 'www1') || ($server eq 'wwwc');
for $statrow (@@statmap) {		# look up server name to get report dir
    ($srv, $uzerz, $repdir, $baseurl) = (@@$statrow); # and authorized user list
    last if $srv eq $server;
}
&errmsg2("undefined server: $server") unless $srv eq $server;

for $secrow (@@secmap) {			# check further restrictions, per user
    ($clients, $clidir) = (@@$secrow);
    last if $clients =~ /-$uni-/;
}

if ($repdate) {
    $weeknum='';
    &errmsg2("repdate must be numeric: $repdate")
					if $repdate =~ /\D/;
    &errmsg2("repdate must be eight digits: $repdate")
					unless length($repdate)==8;
} else {
    &errmsg('weeknum must not be negative') if $weeknum < 0;
    $weeknum='0' unless $weeknum;
    $repdate = &get_repdate($weeknum);	# translate week number to date string
    &errmsg("no web usage report on the $srv server from $weeknum weeks ago")
						unless $repdate;
d79 9
d89 2
d99 9
a107 9
    $sortkey='req';
    $detail=2 if $detail > 2;
    $baseurl .= '/dom';
    $repdir  .= '/dom';
}
if (($server ne 'columbia') && ($server ne 'www1') && ($server ne 'wwwc')) {
    $honly='on';			# other servers only have these
    $sortkey='req';
    $detail=4 if $detail > 4;
d112 6
a117 8
    &errmsg2("user $person is not authorized to see full reports on server $server")
	if ($secure_server) && ($clients =~ /-$uni-/);
    if ($reptype eq 'red') {
	print "Location: $baseurl/$repdate.redir.html\n\n";
    } else {		# URL of the form /httpd/reports/19970816.dtrreq3.html
	print "Location: $baseurl/$repdate.$reptype$sortkey$detail.html\n\n";
    }
    exit;
d121 4
a124 4
    $detail=2;
    # construct URL of the form /httpd/reports/cu/help/19970816.domreq2.html
    print "Location: $baseurl$webdir/$repdate.$reptype$sortkey$detail.html\n\n";
    exit;
a131 9
&errmsg("user $person is not authorized to see reports for server $srv")
		unless (!$uzerz) || $uzerz =~ m/-$uni-/;

		# this user restricted to specific directories on secure servers
if (($secure_server) && ($clients =~ /-$uni-/)) {
    &errmsg("user $person is only authorized to see reports for directory
    $clidir on server $server") unless $webdir =~ m|^$clidir|;
}

d134 1
a134 1
    $repfilnam = "$repdir/$repdate.redir.html";
d136 1
a136 1
    $repfilnam = "$repdir/$repdate.$reptype$sortkey$detail.html";
d139 1
a139 1
    &errmsg("Can't open report file $repfilnam: $!");
a178 21

# read the weeklist file to map weeknum into repdate
# returns the 8 digit report date, or NULL if not found
sub get_repdate {
    local($weeknumber) = @@_;
    local($thisweek) = 0;
    if (!open(DATES, "<$WEEKLIST")) {
	&errmsg("Can't open $WEEKLIST: $!");
    }
    while (<DATES>) {
	if ($weeknumber == $thisweek) {
	    chop;
	    return $_;			# success
	}
	$thisweek++;
    }
    close DATES;
    return '';				# not found
}


@


1.1
log
@Initial revision
@
text
@d60 1
a60 1
	# allow -beecher-dsm-ariel-gd17-svl2-walter-lynn-ggs2-
d65 2
a66 2
	# allow -beecher-dsm-ariel-gd17-svl2-walter-lynn-bk145-lse6-
	['wwwc', '-hb24-dsm5-ariel0-gd17-shv1-wmb2-mej2-bk145-lse6-',
d84 1
a84 1
    $ICON_OUT=$ICON_SEC;
d86 1
a86 1
    $ICON_OUT=$ICON_REG;
d100 4
a103 4
if ($ENV{'REQUEST_METHOD'} eq 'GET') {	# get the input using method=get
    $buffer = $ENV{'QUERY_STRING'};
} else {				# get the input using method=post
    read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
d236 2
a237 2
    # skip the summary and the top 10 list
    # save the Directory Tree Report header in memory
d239 17
a255 13
    # must display icon from server this CGI script is running on
    #  to prevent Netscape warnings about mixing secure and unsecure data
    s|$ICON_REG|$ICON_OUT|o;
    s|$ICON_SEC|$ICON_OUT|o;
    $visible='' if /Summary Report/;
    $visible=1  if s/Directory Tree Report/$& for $webdir/;
    $visible=1  if s/Redirect Report/$& for $webdir/;
    $dtr_header .= $_ if $visible;

    if ($visible && /------------------/) { # line after "Dir/file Req"
	$doffset = length($`);
	last;
    }
@
