#! perl -w
use strict;
# $Id: scribeTAG.perl,v 1.1 2012/05/04 02:34:23 nmendels Exp $
# Generate minutes in HTML from a text IRC/chat Log.
#
# Author: David Booth
# Small modification by Noah Mendelsohn
# to include usage warning on Drafts
# License: W3C Software License (see PrintSoftwareLicense below)
#
# Take a raw W3C IRC log, clean it up a bit, and put it into HTML
# to create meeting minutes. Reads stdin, writes stdout.
# Input format and required scribing conventions are in the documentation:
# http://dev.w3.org/cvsweb/%7Echeckout%7E/2002/scribe/scribedoc.htm
# It's a good idea to pipe the output through "tidy -c".
# (See http://www.w3.org/People/Raggett/tidy/ .)
#
# CONTRIBUTIONS
# Please make improvements to this program! Check them into CVS (or
# email them to me) and notify me by email. Thanks! -- DBooth
# P.S. Please try to avoid dependencies on anything that the
# user might not have installed. I'd like the code to run on
# pretty much any minimal perl installation.
######################################################################
# FEATURE WISH LIST / BUG LIST:
#
# 00000. Working on pre/postParagraph formatting. See nesting problem
# in file:///home/dbooth/w3c/DEV/2002/scribe/test-data/minimal-mit.htm
# First step is to convert to using $scribeParagraphHTMLTemplate.
#
# 0000. BUG: MakeLinks does not work correctly on URLs that contain & because
# it is already escaped into &. Try test-data/22-tag*
#
# 000. Guess template option (such as -mit) from lines like the following.
# Also guess meeting title from channel name or meeting name?
# 19:28:02 RRSAgent has joined &mit
# 19:28:05 Zakim has joined &mit
# 19:28:14 Meeting: MIT Site
# 19:29:06 ted has joined &mit
# 19:29:34 Team_MIT(site)2:30PM has now started
# 19:29:36 +MIT531
# 19:29:44 zakim, mit531 has Alan, Simon, Ralph
# 19:29:44 +Alan, Simon, Ralph; got it
# 19:30:23 Alan has joined &mit
# 19:31:10 Liam has joined &mit
# Also define $defaultMeetingTitle from zakim conference name.
#
# 00. Fix formatting processing to prevent generating invalid HTML.
# This would also be a good step toward processing one line at a time,
# and toward making the formatting be fully template-based.
# test-data/validHTML.txt provides a simple input test case.
#
# 0. Embed the CSS, so that it doesn't take so long to load the page.
#
# 0. BUG: URLs written like are formatted as IRC statements.
# See the text pasted inside [[ ... ]] at
# http://www.w3.org/2004/11/04-ws-desc-minutes.htm#item06
# The relevant code below may be around line 3581.
#
# 0. Add warning if "Chair: " appears more than once, because it
# is likely to be a chair statement rather than a command.
#
# 0. Recognize ACTIONS that have a date in front like:
# 20050105 ACTION Steve: Report to w3m
# See http://www.w3.org/2005/03/23-w3m
#
# 0.1 Summarize RESOLUTIONS at the beginning or end. (Also move summary
# of action items to the beginning?)
#
# 1.2. Add a "Subtopic: ..." command?
#
# 2. Add a warning if a command word appears at the beginning of a line
# but is not followed by a colon. Ditto for action status word followed
# by any other words (except an action command) on the same line.
# The easiest way to do this may be to have ParseLine return an extra
# value that is a warning string that the caller can issue. (ParseLine
# should not issue the warning directly, because it is called multiple
# times on the same input text during lookahead.)
#
# 3. Handle weird chars in nick name:
# See http://cvs.w3.org/Team/~checkout~/WWW/2003/11/21-ia-irc.txt?rev=1.139&content-type=text/plain
#
# 4. Improve the guess of who attended, when zakim did not report
# "attendees were ....". Pick them up from zakim's lines like:
# zakim, who is here?
# On the phone I see Mike_Champion, Hugo, Dbooth, Suresh
# + +1.978.235.aaaa
# Zakim, aaaa is Yin-Leng
# +Yin-Leng; got it
# +??P3
# +S_Kumar
# +Katia_Sycara
# +Abbie
# +Sinisa
# +MIT308
# +Sandro
# zakim, mit308 has DBooth, Ralph
# +DBooth, Ralph; got it
# zakim, Steve just arrived in mit308
# +Steve; got it
# (Examples are from http://www.w3.org/2003/12/11-ws-arch-irc.txt
# and http://www.w3.org/2003/12/09-mit-irc.txt )
# (Also remember to watch out for zakim's continuation lines.)
#
# 4.1 Make a default Topic, same as Meeting title, if there aren't any.
# I think the only reason to do this is to prevent invalid HTML, as
# a result of having an empty

list in the table of contents.
#
# 5. Add a -keepUrls option to retain IRC lines that were not written
# by the scribe even when the -scribeOnly option is used.
#
# 6. Recognize [[ ...lines... ]] and treat them as a block by
# allowing them to be continuation lines for the same speaker,
# because they are probably pasted in.
#
# 7. Get $actionTemplate and $preSpeakerHTML, etc. from the HTML template,
# so that all formatting info is in the template.
#
# 8. Restructure the code to go through a big loop, processing one line
# at a time, with look-ahead to join continuation lines.
#
# 9. Delete extra stopList from GetNames. (There is already a global one.)
#
# 10. Integration between scribe.perl and mit-2 minutes extractor:
# I thought further about this and at present I don't know of an easy
# enough way to make it worthwhile. One issue: the 2-minutes extractor
# requires Team-only access, so I don't know how scribe.perl would supply
# the user name and password.
#
######################################################################
# DESIGN PHILOSOPHY
# 0. Easy to use. It should usually do the right thing, out of the box,
# without any instructions. And it should provide guidance (helpful
# error messages) when it fails.
# 1. No installation required. Please don't add anything that depends
# on a perl module or other software that the user must have (other
# than a standard perl distribution itself).
# 2. Not limited to W3C use. The program should be usable even for
# non-W3C meetings, by people who are not using RRSAgent, zakim or even IRC.
# (But it's fine to provide enhanced capability when W3C tools are used.)
#
######################################################################
#
# WARNING: The code is a horrible mess. (Sorry!) Please hold your nose if
# you look at it. If you have something better, or make improvements
# to this (and please do!), please let me know. Perhaps it's a good
# example of Fred Brooke's advice in Mythical Man Month: "Plan to throw
# one away".
#
######################################################################
#### $diagnostics MUST be initialized early, before anything might call &Warn().
my $diagnostics = ""; # Accumulated diagnostic output.
my ($CVS_VERSION) = q$Revision: 1.1 $ =~ /(\d+[\d\.]*\.\d+)/;
my $versionMessage = 'This is scribe.perl $Revision: 1.1 $ of $Date: 2012/05/04 02:34:23 $
Check for newer version at http://dev.w3.org/cvsweb/~checkout~/2002/scribe/
';
$versionMessage =~ s/\$//g; # Prevent CVS from remunging the version in minutes
&Warn($versionMessage);
##### Formatting:
my $preSpeakerHTML = "";
my $postSpeakerHTML = "";
my $preWriterHTML = "";
my $postWriterHTML = "";
my $prePhoneParagraphHTML = "

- DRAFT -

\n

This is version has not been approved as a true record of the TAG's meeting and there is some risk that individual TAG members have been misquoted. This transcript should typically not be quoted, except as necessary to arrange for correction and approval.

";
$draftWarningHTML = '' if !$draft;
($result =~ s/SV_DRAFT_WARNING/$draftWarningHTML/g) || &Warn("\nWARNING: SV_DRAFT_WARNING not found in template.\nYou can ignore this warning if your minutes template does not\nneed a '- DRAFT -' warning.\n\n");
#### Output seems to be normally valid now.
# &Warn("\nWARNING: There is currently a bug that causes this program to\ngenerate INVALID HTML! You can correct it by piping the output \nthrough \"tidy -c\". If you have tidy installed, you can use \nthe -tidy option to do so. Otherwise, run the W3C validator to find \nand fix the error: http://validator.w3.org/\n\n");
# Embed diagnostics in the generated minutes?
my $diagnosticsHTML = "