#!/usr/bin/perl # bad-links.pl # read the error list and create the error reports # if the -s argument is specified, error reports are sent to individual users # via email $DOCROOT='/www/data'; $TEMP='/tmp/bad-links.tmp'; $ADMINUSER='beecher'; # send root & unknown reports to this user $ERRCOLUMN=58; # users who said "no, thanks" (they will not get any error reports) $no_thanks='-thor-mo107-jmg72-bcw6-mgz1-chh1-ly38-'; chop($SORT=`which sort`); die("can't find sort\n") if $SORT =~ /^no /; $MAILPROG='/usr/lib/sendmail'; die("$MAILPROG not found\n") unless -x $MAILPROG; $send=''; while ($ARGV[0]) { if ($ARGV[0] eq '-s') { # -s means send mail shift; $send=1; } else { print "unexpected option: @ARGV\n"; exit; } } &write_temp_file; &get_header; $prev_group = ''; $prev_user = ''; $prev_req = ''; $prev_url = ''; $prev_ecode = ''; $prev_ecount = ''; $mopen = ''; $mailsent = 0; if (!open(SOR, "$SORT $TEMP |")) { die "can't sort $TEMP: $!\n"; } while () { chop; ($group, $user, $person, $req, $url, $ecode, $ecount, $time1, $time2) = split /\t/; if ($group ne $prev_group) { &new_group; } elsif ($user ne $prev_user) { &new_user; } elsif ($req ne $prev_req) { &new_req; } elsif ($url ne $prev_url) { &new_url; } else { &new_ecode; } $prev_group = $group; $prev_user = $user; $prev_req = $req; $prev_url = $url; } close(SOR); #unlink($TEMP); if ($mopen) { close(MAIL); $mopen=''; } print STDERR "\n$mailsent messages sent\n"; ######################################## sub write_temp_file { # read the error file created by getstats. here is a sample record: # /acis/rad/index.html /~beecher/99503.html 404 2 20/Dec/1997:11:36 20/Dec/1997:14:30 # lookup the user and group, creating the temp file local($fromcgi, $tocgi, $mailto, $badfile, $goodfile) = (0,0,0,0,0); if (!open(TMP, ">$TEMP")) { die "can't create $TEMP: $!\n"; } while () { chop; ($req, $url, $rest) = split(/\t/, $_, 3); if (($req =~ m|^/cgi-bin|) || # ignore bad links from CGI ($req =~ m|^/sec-cgi-bin|)) { $fromcgi++; next; } if (($url =~ m|^/cgi-bin|) || # ignore bad links to CGI ($url =~ m|^/sec-cgi-bin|)) { $tocgi++; next; } if ($url =~ /mailto:/) { # ignore links to mailto: $mailto++; next; } @fileinfo = stat("$DOCROOT$req"); # stat the file $group = 'Unknown'; $user = $ADMINUSER; $person = 'Unknown'; if ($fileinfo[1] && # file exists ($fileinfo[4]!=-1)) { # uid is not -1 @userinfo=getpwuid($fileinfo[4]); # username and group $user=$userinfo[0] if $userinfo[0]; $group=getgrgid($userinfo[3]); $person = (split(',',$userinfo[6],2))[0]; $person =~ s/ / /g; $person = 'Unknown' unless $person; print TMP join("\t", $group, $user, $person, $req, $url, $rest)."\n"; $goodfile++; } else { $badfile++; } } close(TMP); print STDERR "$fromcgi links from CGI files ignored\n"; print STDERR "$tocgi links to CGI files ignored\n"; print STDERR "$mailto mailto links ignored\n"; print STDERR "$badfile files not found\n"; print STDERR "$goodfile files included in report\n"; } sub new_group { print "\n\n-------- GROUP $group --------"; &new_user; } sub new_user { local($muser); if ($mopen) { close(MAIL); $mopen=''; } if (!$send || # -s command line option ($no_thanks =~ /-$user-/)) { # no mail for these users print "\n\nReport for $person <$user>\n"; &new_req; return; } $muser=$user; $muser=$ADMINUSER if $user eq 'root'; print "\n\nSending mail to $person <$muser>\n"; die("can't run $MAILPROG: $!\n") unless open(MAIL, "|$MAILPROG -t"); $mopen=1; $mailsent++; print MAIL "To: $muser Subject: web error report $header *** Web Error Report for $person <$user> *** "; &new_req; } sub new_req { local($request) = $req; $request =~ s/#.*//; print MAIL "\n$request\n" if $mopen; print "\n$request\n"; &new_url; } sub new_url { local($msg)= " bad link to $url\n"; print MAIL $msg if $mopen; print $msg; &new_ecode; } sub new_ecode { local($errmsg); if ($ecode eq '400') { $errmsg='Bad syntax'; } elsif ($ecode eq '401') { $errmsg='Access denied'; } elsif ($ecode eq '403') { $errmsg='Protected'; } elsif ($ecode eq '404') { $errmsg='Not found'; } elsif ($ecode eq '405') { $errmsg='Method not allowed'; } elsif ($ecode eq '408') { $errmsg='Timeout'; } elsif ($ecode eq '500') { $errmsg='Server error'; } elsif ($ecode eq '501') { $errmsg='Not implemented'; } else { $errmsg=$errcode; } if ($ecount==1) { print MAIL " $errmsg at $time1\n" if $mopen; print " $errmsg at $time1\n"; } else { print MAIL " $errmsg [$ecount times] $time1 to $time2\n" if $mopen; print " $errmsg [$ecount times] $time1 to $time2\n"; } } sub get_header { # you can put informative messages here $header=" *** About the Weekly Error Report for Web Pages *** Here is your web error report for the past week. "; }