#!/usr/bin/perl -w
# rfcindex,v 1.7 2000/02/20 16:11:54 njh Exp
=head1 NAME
B - add HTML markup to an rfc-index.txt file
=head1 SYNOPSIS
B [B=base-URL] [options] [F F]
=head1 README
Online RFC repositories typically contain a text file produced by the
RFC Editor, F, which lists the RFCs currently in
existence. rfcindex is a Perl script that reads the plain text index
file and outputs an HTML index file. The RFC number of each citation
becomes a hyperlink to the text of that RFC (if an online version of
the RFC exists), and the cross references between citations
(obsoletes, obsoleted by, updates, updated by) become hyperlinks
within the HTML index.
=head1 OPTIONS
=over 5
=item B
Hyperlinks to RFC texts will be relative to the base URL specified.
=item B
Select alternative markup which avoids table tags and produces a file
which can be rendered in an incremental fashion. By default the HTML
markup applied to the index uses a table, which can result in HTML
which is quite slow for some browsers to layout (particularly if the
file is being accessed over a network).
=item B
Supress the date line which is normally added to the HTML output.
=item B
Supress the hyperlink to the home page of this program, which is
normally added to the HTML output.
=item B or B
Prints version information, copyright, and a pointer towards the
documentation, then exits.
=back
=head1 EXAMPLES
To generate an HTML index for a locally held mirror of the RFC archive:
rfcindex rfc-index.txt >index.html
To generate a locally held index to a remote RFC repository:
lynx -source http://www.example.net/rfc/rfc-index.txt \
| rfcindex --base http://www.example.net/rfc/ >rfc-index.html
(of course, this example assumes that you have the B browser
available - if not then download a copy of the rfc-index.txt file from
the remote repository some other way and work on that).
=head1 BUGS
As part of the markup process, the title string in each citation is
emphasised. The regular expressions used to determine where the title
ends and the author list begins within the citation appear to produce
correct results for all RFCs which were listed at the time of writing,
but are not necessarily robust against all possible future title /
author combinations.
Comments, suggestions for improvement and bug reports are always
welcome (see email address below).
=head1 LICENSE
Copyright (C) 1999, 2000 Neil Hoggarth
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
The GNU General Public License is available from:
http://www.gnu.org/copyleft/gpl.html
New versions of this script, and my other free software, will
be made available from:
http://www.kernighan.demon.co.uk/software/
=head1 PREREQUISITES
This script requires the C and C modules.
=head1 SCRIPT CATEGORIES
Web
=cut
use strict;
use Getopt::Long;
my $VERSION=1.2;
my ($mk_index_start, $mk_index_end, $mk_citation_start, $mk_title_start);
my $base="";
my $table=1;
my $showdate=1;
my $showcredit=1;
GetOptions("table!" => \$table,
"date!" => \$showdate,
"credit!" => \$showcredit,
"base=s" => \$base,
"version" => \my $getversion,
"help" => \my $gethelp);
# If a base URL has been provided and it isn't slash-terminated then
# make it so.
if ($base && ($base =~ /[^\/]$/)) {
$base .= "/";
}
if ($table) {
$mk_index_start="

";
$mk_index_end="

";
$mk_citation_start="

";
$mk_title_start="

";
} else {
$mk_index_start="

";
$mk_index_end="

";
$mk_citation_start="

";
$mk_title_start=": ";
}
if ($getversion || $gethelp) {
print "\nrfcindex, version $VERSION " .
'(2000/02/20 16:11:54)' . "\n\n" .
"Copyright (C) 1999, 2000 Neil Hoggarth . " .
"Run\n'perldoc rfcindex' for usage and licensing information.\n\n";
} else {
# A previous release of this script put a tag in
# the part of the output, in order to make the external RFC
# document references relative to any URL requested using the
# --base option. Unfortunately Netscape 4 appears to apply the
# base URL to the internal, in-document cross-references; I think
# that this is a Netscape bug (other browsers that I have tried do
# "the right thing") but Netscape is sufficiently prevalent that
# we ought to work around it. In this version of the script I have
# dropped the tag and the base URL is manually hacked into
# the external references in the body, as and when they are made.
print "\n";
print "\n";
print "RFC Index\n";
print "\n";
print "\n";
print "

RFC Index

\n";
# Label the output with the current date, ISO 8601 style. I'm not
# going to worry over much about hours, minutes, seconds,
# timezones, etc or the date stamp on the source file. The
# objective is just to indicate to the reader whether they are
# looking at something reasonably current or not.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
$year += 1900; $mon++;
print "