#!/usr/bin/perl ### ---------------------------------------------------------------------- ### # # my-url-minder.pl # # A reproduction of the Netmind URL-Minder service # http://minder.netmind.com/index.html # # This lets you take part of the page out with a regexp # so lame things like SSI counters don't cause you to get # notifications every time the counter ticks over. # # It uses a MD5 checksum of the cut-down page as a basis for # comparison. # # (c) Copyright, Martin Gleeson, 4 August 1998 ### ---------------------------------------------------------------------- ### use strict; use Getopt::Long; my ($prog) = $0 =~ /([^\/]+)$/; my ($db, %opts); my $version = "1.0"; &GetOptions(\%opts, "check", "add", "url:s", "exp:s", "v","d"); my $verbose = 1 if $opts{'v'}; my $debug = 1 if $opts{'d'}; my $blug = " " x length($prog); my $usage = < [-exp ] Add to database, optionally excluding $blug portion of the page matching $blug (adds to record if $blug is already in database) EOF ### ---------------------------------------------------------------------- ### ### User configuration follows ### my $data_dir = "$ENV{'HOME'}/doc/url-minder"; ### End of user configuration - no changes required below this line ### ### ---------------------------------------------------------------------- ### if($opts{'check'}) { my (@lines, $change); # Read in the database $db = "$data_dir/url_data.txt"; open DB, $db or die "Couldn't open database [$db] for reading: $!\n"; while() { chomp; my $exp; my @fields = split(/\t/); my $url = $fields[0]; my $md5 = $fields[1]; my @reg_exps = @fields[2 .. $#fields]; print "$prog: Checking [$url]\n" if $verbose; print "$prog: \tMD5 Digest is [$md5]\n" if $verbose; my $page = &get($url); if($page =~ m/%%ERROR%%/) { print "$prog: Error in retrieving [$url] - skipping it.\n"; next; } foreach $exp (@reg_exps) { print "$prog: \tRemoving expression [$exp] from page\n" if $verbose; $page =~ s#$exp##; } print "$prog: \tPage is [$page]\n" if $debug; my $digest = &md5($page); if($digest ne $md5) { print "$prog: \tNew MD5 Digest is [$digest]\n" if $verbose; print "$prog: URL [$url] has changed!!\n"; $fields[1] = $digest; $change = 1; } else { print "$prog: \tNew MD5 Digest is [$digest]\n" if $verbose; print "$prog: URL [$url] has NOT changed!!\n" if $verbose; } my $line = join("\t",@fields); push(@lines,"$line\n"); } close DB; if($change) { my $line; open DB, ">$db" or die "Couldn't open database [$db] for writing: $!\n"; foreach $line (@lines) { print DB $line; } close DB; } } elsif($opts{'add'}) { if(! $opts{'url'}) { print "$prog: Error! Must specify url when adding.\n"; print $usage; exit 1; } my (@lines, $change); # Read in the database $db = "$data_dir/url_data.txt"; if(! -e $db) { $lines[0] = "$opts{'url'}" . "\t"; $lines[0] .= "\t" . $opts{'exp'} if $opts{'exp'}; $lines[0] .= "\n"; $change = 1; } else { open DB, $db or die "Couldn't open database [$db] for reading: $!\n"; while() { chomp; my @fields = split(/\t/); my $url = $fields[0]; my @reg_exps = @fields[2 .. $#fields]; if($url eq $opts{'url'}) { if(! $opts{'exp'}) { print "$prog: Error! Must specify regular expression when adding to existing url.\n"; print $usage; exit 1; } push(@reg_exps,$opts{'exp'}); $change = 1; } my $line = join("\t", $fields[0], $fields[1], @reg_exps); push(@lines, "$line\n"); } if(! $change) { $change = 1; my $line = "$opts{'url'}" . "\t"; $line .= "\t" . $opts{'exp'} if $opts{'exp'}; push(@lines, "$line\n"); } } if($change) { my $line; open DB, ">$db" or die "Couldn't open database [$db] for writing: $!\n"; foreach $line (@lines) { print DB $line; } close DB; } } else { print $usage; } exit 0; ### ---------------------------------------------------------------------- ### sub get { my $URL = pop(@_); my $ret; use LWP::UserAgent; # Create a user agent object my $ua = new LWP::UserAgent; $ua->agent("$prog/$version " . $ua->agent); # Create a request my $req = new HTTP::Request GET => "$URL"; # Accept all data types. This is specifically for Microsloth's # IIS, which won't properly default to */* and throws a 406. $req->header('Accept' => '*/*'); # Pass request to the user agent and get a response back my $res = $ua->request($req); # Check the outcome of the response if ($res->is_success) { $ret = $res->content; } else { $ret = '%%%ERROR%%%'; } return $ret; } ### ---------------------------------------------------------------------- ### sub md5 { my $text = pop(@_); my ($md5, $digest); use MD5; $md5 = new MD5; $md5->add($text); $digest = $md5->digest(); return unpack("H*", $digest); } ### ---------------------------------------------------------------------- ### sub time_now { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst); my %months = ( '0','Jan', '1','Feb', '2','Mar', '3','Apr', '4','May', '5','Jun', '6','Jul', '7','Aug', '8','Sep', '9','Oct', '10','Nov', '11','Dec'); my $now; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $hour = "0" . $hour if($hour < 10); $min = "0" . $min if($min < 10); $sec = "0" . $sec if($sec < 10); $now = "$hour:$min:$sec $mday $months{$mon} $year"; return $now; }