#!/usr/bin/perl
=head1 NAME
gen-topics - create files used in frames from a main file
=head1 SYNOPSIS
./gen-topics.plx input.html output-prefix
=head1 DESCRIPTION
This program will scan an HTML file (in a very particular format) and
generate the auxilary documents used in a simple frameset:
+------------------------------------------------------------+
| top topic selections (frame "topics") |
+------------------------------------------------------------+
| | |
| per-topic | main document |
| outline | (frame "main") |
| (frame "subtopics") | |
| | |
+------------------------------------------------------------+
It also inserts a small amount of javascript to try to keep the
content synchronized.
=head2 Arguments
The first argument is the name of the input HTML file. The second
argument is a prefix used for all generated HTML files.
=head2 Input
The input should be a single HTML file, with the following
conventions:
=over 4
=item *
H2 headers introduce new topics
=item *
H3 or lower headers introduced nested tables of contents
=item *
All headers must start with at least a single named anchor. e.g.,
=back
Currently, I chop off all the H3-or-lower text at the first colon.
=head1 Output
There are three types of files created:
=over 4
=item 1.
The main frameset: $prefix-frameset.html
=item 2.
The top topics bar: $prefix-topics.html
Every topic will be included in a file "$prefix-topics.html"; this is
intended to be loaded into the "topics" frame. It has a link to each
topic file (described below) as well as a "no frames" option.
=item 3.
The subtopics page on the left: $prefix-topic-$name.html
For every topic found in the input file, we generate a file called
"$prefix-topic-$name.html". The $label is the C value from the
H2 header that started that topic. It is intended to be loaded into
the "subtopics" frame when the user clicks the corresponding link in
the "topics" frame.
=back
=cut
# ======================================================================
# modules, etc
# pragmata
use warnings;
use strict;
# standard modules
use File::Copy qw( move );
# constants
use constant DEBUG_SCAN => 0x01;
use constant DEBUG_FILE_OPS => 0x02;
my $DEBUG = 0;
# ======================================================================
# subroutines
sub backup
{
my ( $file ) = @_;
if ( $DEBUG & DEBUG_FILE_OPS )
{
print STDERR "backing up '$file'\n";
}
return unless -e $file;
my $backup = $file;
my $i = 0;
while ( -e $backup )
{
$backup = $file . '.' . $i++;
}
move $file, $backup
or die "move '$file' to '$backup' failed: $!";
}
# ======================================================================
# option processing
unless ( @ARGV == 2 )
{
die "usage: $0 HTML_IN PREFIX";
}
my ( $HTML_IN, $PREFIX ) = @ARGV;
# ======================================================================
# main processing
# ----------------------------------------------------------------------
# input file scanning
open my $html_in, '<', $HTML_IN
or die "opening $HTML_IN for read: $!";
# each topic is represented by a hash ref with the following keys:
#
# text - human-readable version of the text
# name - the value of the "name" parameter on the text anchor
# level - the header number this topic is at
#
my @topics;
my $main_title_text;
my $problems = 0;
while ( my $line = <$html_in> )
{
$line =~ s!\s+\z!!;
if ( $line =~ m!(.*)!i )
{
$main_title_text = $1;
}
my ( $level ) = ( $line =~ m!!i )
or next;
until ( $line =~ m!! )
{
$line .= <$html_in>;
$line =~ s!\s+\z!!;
}
# skip tags
next if $level < 2;
my ( $name ) = ( $line =~ m!(.*)!$2!i;
my ( $text ) = ( $line =~ m!(.*)!i );
unless ( $text )
{
warn "$HTML_IN:$.: could not find text: '$line'";
++$problems;
}
# need to strip out any anchors in $text
$text =~ s!(.*?)!$1!ig;
if ( $level >= 3 )
{
$text =~ s!:.*!!;
}
push @topics, { text => $text, name => $name, level => $level };
}
if ( $DEBUG & DEBUG_SCAN )
{
foreach my $topic ( @topics )
{
print " " x $topic->{level}, "'$topic->{name}' => '$topic->{text}'\n";
}
}
if ( $problems )
{
die "$HTML_IN: $problems problems found, not generating any output";
}
# ----------------------------------------------------------------------
# output individual topic files
# keep a list of topic files generated: filename and text at least
my @topic_files;
my $topic_fh;
my $last_level = 2;
foreach my $topic ( @topics )
{
if ( $topic->{level} == 2 )
{
my $topic_file = "$PREFIX-topic-$topic->{name}.html";
backup $topic_file;
if ( $topic_fh )
{
while ( $last_level > 2 )
{
print $topic_fh " \n";
--$last_level;
}
print $topic_fh <