#! /usr/bin/perl

use LWP::Simple;                  # http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm

# do not check links to wikipedia because they all return nothing


if ($ARGV[0]) {
	print "$ARGV[0]\n";
	while (<>) {
		if (/^url\s+(.*)$/) { # url checked
			$url = $1;
		} elsif (/^type\s+$/) { # get failed
			print "$url\n";
		}
	}
} else {
	while (<>) { # a url on each line
		chop;
		$url = $_;
		# clean up url in %W field
		$url =~ s/^%W\s+//; # remove leading %W
		$url =~ s/\s+.*$//; # remove anything after url
		if ($url =~ /^(news|ftp|mailto)/) {
			$content_type = "$1"
		} elsif ($url =~ /(wikipedia.org|oclc.org|hcibib.org)/) {
			$content_type = "$1"
		} else {
			($content_type, $document_length, $modified_time, $expires, $server) = head($url);
		}
		print "url	$url\n";
		print "type	$content_type\n";
		print "length	$document_length\n";
		print "modif	$modification_time\n";
		print "expire	$expires\n";
		print "server	$server\n";
		print "\n";
		# $doc = get($url); # FETCH THE URL
		# print $doc;
	}
}


