#!/usr/bin/perl -w
#
# written by <tkil@scyre.com> 2000-01-20
#
# this is free software; you may use, modify, and distribute this
# program under the same terms as perl itself.

use strict;

package FQParser;

use HTML::Parser;
use URI::URL;

@FQParser::ISA = qw(HTML::Parser);

# takes one optional argument, a base url.  typically this will be the
# url you fetched the contents from.

sub new
{
    my $class = shift;
    my $self = HTML::Parser->new();

    bless $self, $class;

    # what is the base URL we should try to absolutize to?
    $self->{_fqp_base} = shift;

    # an accumulator for the output
    $self->{_fqp_out} = '';

    # and a flag to tell us whether or not we're in the <HEAD></HEAD> block
    $self->{_fqp_in_head} = 0;

    return $self;
}

# the "start" method is the only one that needs brains.  and the "end"
# method needs a small brain, since we need to track whether or not
# we're in the <HEAD> section.

sub start
{
    my ($self, $tag, $attr, $attrseq, $orig_text) = @_;

    if ($tag eq 'head')
    {
        $self->{_fqp_in_head} = 1;
    }
    elsif ($self->{_fqp_in_head} && $tag eq 'base')
    {
        $self->{_fqp_base} = $attr->{href};
    }
    elsif (exists $attr->{href} || exists $attr->{src})
    {
        # need to munge it.
        for my $link_attr (qw/href src/)
        {
            next unless exists $attr->{$link_attr};
            my $orig_url = $attr->{$link_attr};
            my $new_url = URI::URL->new($attr->{$link_attr},
                                        $self->{_fqp_base})->abs();

            # normally i'd try to do this more gracefully, but this
            # should do the trick.  eeek.
            if ($orig_url ne $new_url)
            {
                $orig_text =~ s{($link_attr)\s*=\s*([\"\']?)$orig_url\2}
                               {$1="$new_url"}i;
            }
        }
    }

    $self->{_fqp_out} .= $orig_text;
}

sub end
{
    my ($self, $tag, $orig_text) = @_;

    if ($tag eq 'head')
    {
        $self->{_fqp_in_head} = 0;
    }

    $self->{_fqp_out} .= $orig_text;
}

# the rest of the overrides just copy stuff along

sub declaration
{
    my ($self, $decl) = @_;
    $self->{_fqp_out} .= "<!" . $decl . ">";
}

sub text
{
    my ($self, $text) = @_;
    $self->{_fqp_out} .= $text;
}

sub comment
{
    my ($self, $comment) = @_;
    $self->{_fqp_out} .= $comment;
}

# and finally, to get the results...

sub get_doc
{
    my $self = shift;
    return $self->{_fqp_out};
}

#############################################################################
#############################################################################

package main;

use LWP::Simple;

for my $url (@ARGV)
{
    my $fqp = FQParser->new($url);
    my $contents = get($url);
    $fqp->parse($contents);
    print $fqp->get_doc();
}

exit 0;