#!/usr/local/web/bin/perl -ws ### ---------------------------------------------------------------------- ### # # Marty's Quick-n-Dirty link checker # # Created: 4 June 1996 # Last Modified: 7 August 1997 # # Author: Martin Gleeson # # (c) Copyright, The University of Melbourne, 1996-1997 # # Checks all non-local URLs on a page # require 5.003; use LWP::UserAgent; $| = 1; $v = 1 if($v); $0 =~ s/(.+)\/([^\/]+)/$2/g; print STDERR "Usage: $0 [-v] file [file 2 .. file n]\n" if(!$ARGV[0]); exit(1) if(!$ARGV[0]); # parse page foreach $page (@ARGV) { open PAGE, $page or die "Couldn't open $page: $!"; print "Opened $page\n" if $v; @lines = ; $html = join('',@lines); $html =~ s/\n//g; # make it one long line print "Page is [$html]\n" if $v; $_ = $html; @urls = m/(\w+\:\/\/[\w\.]+\/?[^\"\>\ \&\<]*)/g; # grab all URLs # into an array printf "%-78s %8s %30s %5s %5s\n", "URL", "Bytes", "Last Modified", "Code", "Type"; printf "%-78s %8s %30s %5s %5s\n", "------------------------------------------------------------------------------", "--------", "------------------------------", "-----", "-----"; foreach $url (@urls) { if( ! $all_urls{$url}) # no need to check the same URL twice { $all_urls{$url} = 1; print "Checking URL: $url\n" if $v; &URLget($url); # $result = &URLget($url); # get and print if error # print $result if(($result =~ /> Error/) || $v); } } } exit(0); ### ---------------------------------------------------------------------- ### sub URLget{ local($URL) = @_; local($ret,$lastmod,$message); # Create a user agent object $ua = new LWP::UserAgent; $ua->agent("Marty's qnd-link-check.pl/1.0 " . $ua->agent); # un-comment this line and change the proxy if you want to use one # $ua->proxy(['http', 'ftp'], 'http://wwwproxy.unimelb.edu.au:8000/'); # Create a request my $req = ($URL =~ /^ftp/i) ? new HTTP::Request GET => "$URL" : new HTTP::Request HEAD => "$URL"; # Pass request to the user agent and get a response back my $res = $ua->request($req); $lastmod = ($res->is_success) ? $res->header('Last-Modified') : $res->message; $message = ($res->is_success) ? $res->message : ""; printf "%-78s ", $URL; ($res->header('Content-Length') ) ? printf "%8s ", $res->header('Content-Length') : printf "%8s ", " "; ($lastmod) ? printf "%30s ", $lastmod : printf "%30s ", " "; ($res->code) ? printf "%5s ", $res->code : printf "%5s ", " "; ($message) ? printf "%5s\n", $message : printf "%5s\n", " "; return $ret; } ### ---------------------------------------------------------------------- ###