#!/usr/bin/perl =head1 NAME gen-topics - create files used in frames from a main file =head1 SYNOPSIS ./gen-topics.plx input.html output-prefix =head1 DESCRIPTION This program will scan an HTML file (in a very particular format) and generate the auxilary documents used in a simple frameset: +------------------------------------------------------------+ | top topic selections (frame "topics") | +------------------------------------------------------------+ | | | | per-topic | main document | | outline | (frame "main") | | (frame "subtopics") | | | | | +------------------------------------------------------------+ It also inserts a small amount of javascript to try to keep the content synchronized. =head2 Arguments The first argument is the name of the input HTML file. The second argument is a prefix used for all generated HTML files. =head2 Input The input should be a single HTML file, with the following conventions: =over 4 =item * H2 headers introduce new topics =item * H3 or lower headers introduced nested tables of contents =item * All headers must start with at least a single named anchor. e.g.,

Worldwide Data Distribution System: Architect / Implementor

=back Currently, I chop off all the H3-or-lower text at the first colon. =head1 Output There are three types of files created: =over 4 =item 1. The main frameset: $prefix-frameset.html =item 2. The top topics bar: $prefix-topics.html Every topic will be included in a file "$prefix-topics.html"; this is intended to be loaded into the "topics" frame. It has a link to each topic file (described below) as well as a "no frames" option. =item 3. The subtopics page on the left: $prefix-topic-$name.html For every topic found in the input file, we generate a file called "$prefix-topic-$name.html". The $label is the C value from the H2 header that started that topic. It is intended to be loaded into the "subtopics" frame when the user clicks the corresponding link in the "topics" frame. =back =cut # ====================================================================== # modules, etc # pragmata use warnings; use strict; # standard modules use File::Copy qw( move ); # constants use constant DEBUG_SCAN => 0x01; use constant DEBUG_FILE_OPS => 0x02; my $DEBUG = 0; # ====================================================================== # subroutines sub backup { my ( $file ) = @_; if ( $DEBUG & DEBUG_FILE_OPS ) { print STDERR "backing up '$file'\n"; } return unless -e $file; my $backup = $file; my $i = 0; while ( -e $backup ) { $backup = $file . '.' . $i++; } move $file, $backup or die "move '$file' to '$backup' failed: $!"; } # ====================================================================== # option processing unless ( @ARGV == 2 ) { die "usage: $0 HTML_IN PREFIX"; } my ( $HTML_IN, $PREFIX ) = @ARGV; # ====================================================================== # main processing # ---------------------------------------------------------------------- # input file scanning open my $html_in, '<', $HTML_IN or die "opening $HTML_IN for read: $!"; # each topic is represented by a hash ref with the following keys: # # text - human-readable version of the text # name - the value of the "name" parameter on the text anchor # level - the header number this topic is at # my @topics; my $main_title_text; my $problems = 0; while ( my $line = <$html_in> ) { $line =~ s!\s+\z!!; if ( $line =~ m!(.*)!i ) { $main_title_text = $1; } my ( $level ) = ( $line =~ m!!i ) or next; until ( $line =~ m!! ) { $line .= <$html_in>; $line =~ s!\s+\z!!; } # skip

tags next if $level < 2; my ( $name ) = ( $line =~ m!(.*)!$2!i; my ( $text ) = ( $line =~ m!(.*)!i ); unless ( $text ) { warn "$HTML_IN:$.: could not find text: '$line'"; ++$problems; } # need to strip out any anchors in $text $text =~ s!(.*?)!$1!ig; if ( $level >= 3 ) { $text =~ s!:.*!!; } push @topics, { text => $text, name => $name, level => $level }; } if ( $DEBUG & DEBUG_SCAN ) { foreach my $topic ( @topics ) { print " " x $topic->{level}, "'$topic->{name}' => '$topic->{text}'\n"; } } if ( $problems ) { die "$HTML_IN: $problems problems found, not generating any output"; } # ---------------------------------------------------------------------- # output individual topic files # keep a list of topic files generated: filename and text at least my @topic_files; my $topic_fh; my $last_level = 2; foreach my $topic ( @topics ) { if ( $topic->{level} == 2 ) { my $topic_file = "$PREFIX-topic-$topic->{name}.html"; backup $topic_file; if ( $topic_fh ) { while ( $last_level > 2 ) { print $topic_fh " \n"; --$last_level; } print $topic_fh < HTML close $topic_fh; } { open my $fh, '>', $topic_file or die "opening '$topic_file' for write: $!"; $topic_fh = $fh; } push @topic_files, { file => $topic_file, text => $topic->{text} }; my $href = "$HTML_IN#$topic->{name}"; print $topic_fh < $topic->{name}
    HTML } my $level_diff = $topic->{level} - $last_level; $last_level = $topic->{level}; if ( abs( $level_diff ) > 1 ) { die "level_diff = $level_diff, bailing"; } elsif ( $level_diff == 1 ) { print $topic_fh "
      \n"; } elsif ( $level_diff == -1 ) { print $topic_fh "
    \n"; } print $topic_fh ( "
  • {name}\" target=\"main\">". "$topic->{text}
  • \n" ); } if ( $topic_fh ) { while ( $last_level > 2 ) { print $topic_fh "
\n"; --$last_level; } print $topic_fh < HTML close $topic_fh; } # ---------------------------------------------------------------------- # output topic selection file my $topics_file = "$PREFIX-topics.html"; backup $topics_file; open my $topics_fh, ">", $topics_file or die "opening '$topics_file' for write: $!"; print $topics_fh < $HTML_IN topics

HTML my $later = 0; foreach my $topic_file ( @topic_files ) { print $topics_fh ' ', ( $later++ ? ' | ' : '' ), qq|$topic_file->{text}\n|; } print $topics_fh ' ', ( $later++ ? ' | ' : '' ), qq|No Frames\n|; print $topics_fh < HTML close $topics_fh or warn "closing '$topics_file' after write: $!"; # ---------------------------------------------------------------------- # output frameset my $frameset_file = "$PREFIX-frameset.html"; backup $frameset_file; open my $frameset_fh, '>', $frameset_file or die "opening '$frameset_file' for write: $!"; print $frameset_fh < $main_title_text HTML close $frameset_fh or warn "closing '$frameset_file' after write: $!"; exit 0;