#! /usr/bin/perl

# tochtml: Create a Table of Contents of an HTML File

$* = 1;       # enable multi-line patterns

$tab = "   "; # what to repeat before each entry
$depth = 0;   # initial depth

$title = "<p><b>Table of Contents</b>";
$title = "";
$list = "UL";

print "<!-- This Table of Contents generated by tochtml -->\n";
print "$title\n";

while (<>) {
	if (/<[Hh]([1-9]).*/) { # grab heading level and start heading
		$level = $1;
		$line = $_;
	} elsif ($line) { # append to the line we are gathering
		$line .= $_;
	}
	if ($line =~ /.*<\/[Hh]$level>/) { # heading ends on this line
		&printheading ($line, $level);
		$line = "";
	}
}
&printheading ("", 0); # end the headings

sub dup
{
	local ($s, $n) = (@_);
	while ($n > 0) {
		print $s;
		$n--;
	}
}

sub printheading { # line level
	local ($line, $level) = ($_[0], $_[1]);
	local ($name);
	$line =~ s/\s*<\/?h$level[^>]*>\s*//gi; # remove start and end <hN>
	$line =~ s/\s*<\/?font[^>]*>\s*//gi;    # remove start and end <font>
	$line =~ s/\s*<\/?center[^>]*>\s*//gi;    # remove start and end <font>
	$line =~ s/\s*<\/?hr[^>]*>\s*//gi;    # remove start and end <font>
	if ($line =~ /<[Aa]\s*.*name\s*=\s*"{0,1}([-_A-Za-z0-9.]+)"{0,1}/i) {
		$name = $1;
	}
	$line =~ s/<\/?a[^>]*>//gi;
	$line =~ s/\s+/ /g;                     # translate embedded spaces
	$line =~ s/\s*$//;                      # remove trailing whitespace
	if ($level > $depth) { # open one or more lists
		while ($depth != $level) {
			dup ($tab, $depth);
			print "<$list>\n";
			$depth++;
		}
	} elsif ($level < $depth) { # close one or more lists
		while ($depth != $level) {
			$depth--;
			dup ($tab, $depth);
			print "</$list>\n";
		}
	}
	dup ($tab, $level);
	if ($name) { # include link to named section
		print "<li><a href=\"#$name\">$line</a>\n";
	} elsif ($line) {
		print "<li>$line\n";
	}
}
