#! /usr/bin/perl

# tochtml: Create a Table of Contents of an HTML File

# TODO: add maxdepth param

$title = "<p><b>Table of Contents</b>";     # title for top of toc
$title = "";
$list = "UL";                               # list type tag
$item = "LI";                               # list item tag
$tab = "   ";                               # what to indent before entries

$* = 1;                                     # enable multi-line patterns
$depth = 0;                                 # initial depth

print "$title\n";                           # title for the table of contents

# main loop
while (<>) {                                # read all the files
	if (/<[Hh]([1-9]).*/) {                 # get heading level & start heading
		$level = $1;
		$line = $_;
	} elsif ($line) {                       # append to line if gathering
		$line .= $_;
	}
	if ($line =~ /.*<\/[Hh]$level>/) {      # heading ends on this line
		&printheading ($line, $level);
		$line = "";
	}
}
&printheading ("", 0);                      # end the headings

sub printheading {                          # line level
	local ($line, $level) = ($_[0], $_[1]); # <hN>.*</hN> and N
	local ($name);                          # anchor name in <hN>
	$line =~ s/\s*<\/?h$level[^>]*>\s*//gi; # remove start and end <hN>
	$line =~ s/\s*<\/?font[^>]*>\s*//gi;    # remove start and end <font>
	if ($line =~ /<[Aa]\s*.*name\s*=\s*"{0,1}([-A-Za-z0-9.]+)"{0,1}/i) {
		$name = $1;                         # grab name field from anchor in <hN>
	}
	$line =~ s/<\/?a[^>]*>//gi;             # remove anchor
	$line =~ s/\s+/ /g;                     # translate embedded spaces
	$line =~ s/\s*$//;                      # remove trailing whitespace
	if ($level > $depth) {                  # open one or more lists
		while ($depth != $level) {
			print "<$list>\n";
			$depth++;
		}
	} elsif ($level < $depth) {             # close one or more lists
		while ($depth != $level) {
			$depth--;
			print "</$list>\n";
		}
	}
	if ($line) {
		print "<$item>";     # indent and tag the toc item
		if ($name) {                        # include link to named section
			print "<a href=\"#$name\">$line</a>\n";
		} else {
			print "$line\n";
		}
	}
}
