apg · March 20, 2017 22:53
diff --git a/man2html.awk b/man2html.awk
 ### ====================================================================
 ###  @Awk-file{
 ###     author          = "Nelson H. F. Beebe",
 ###     version         = "1.06",
 ###     date            = "24 October 1997",
 ###     time            = "21:34:34 MDT",
 ###     filename        = "man2html.awk",
 ###     address         = "Center for Scientific Computing
 ###                        University of Utah
 ###                        Department of Mathematics, 105 JWB
 ###                        155 S 1400 E RM 233
 ###                        Salt Lake City, UT 84112-0090
 ###                        USA",
 ###     telephone       = "+1 801 581 5254",
 ###     FAX             = "+1 801 581 4148",
 ###     URL             = "http://www.math.utah.edu/~beebe",
 ###     checksum        = "01400 968 2975 23193",
 ###     email           = "[email protected] (Internet)",
 ###     codetable       = "ISO/ASCII",
 ###     keywords        = "nroff, troff, UNIX manual page",
 ###     supported       = "yes",
 ###     docstring       = "This program converts UNIX manual pages
 ###                        in nroff/troff markup to strictly-conformant
 ###                        HTML 2.0, 3.0, or 3.2.  [Actually, only two
 ###                        HTML 3.x entities (`&nbsp;' and `&shy;')
 ###                        are used, and those rarely; otherwise, the
 ###                        syntax conforms strictly to HTML 2.0.]
 ###
 ###                        Usage:
 ###                        	nawk -f man2html.awk [HTML=2|3|3.2] \
 ###                                manpage-file >html-file
 ###
 ###                        The single option, HTML=2, HTML=3, or
 ###                        HTML=3.2, selects the HTML grammar level.
 ###                        The default is HTML=2.
 ###
 ###                        This program is normally run via a shell
 ###                        wrapper that offers an option for setting the
 ###                        output file name.  It has been used to
 ###                        successfully convert entire man-page
 ###                        collections on several UNIX systems to HTML
 ###                        form for convenient World-Wide Web browser
 ###                        access.
 ###
 ###                        Of those nroff/troff commands defined in the
 ###                        -man format used for UNIX manual pages, only
 ###                        the most commonly-used ones are supported;
 ###                        unrecognized ones will be warned about, and
 ###                        preserved as HTML comments in the output.
 ###
 ###                        UNIX man pages tend to be written in a
 ###                        highly-stylized fashion that we apply
 ###                        heuristics to in order to recover high-level
 ###                        HTML structure from low-level nroff/troff
 ###                        markup.  Deviations from conventional
 ###                        man-page writing practice will likely result
 ###                        in less-than-perfect translation to HTML.
 ###
 ###                        Although there are several other `man2html'
 ###                        translators available on the Internet, this
 ###                        one is entirely of my own authorship, with no
 ###                        code borrowing from anywhere else.
 ###
 ###                        The checksum field above contains a CRC-16
 ###                        checksum as the first value, followed by the
 ###                        equivalent of the standard UNIX wc (word
 ###                        count) utility output of lines, words, and
 ###                        characters.  This is produced by Robert
 ###                        Solovay's checksum utility.",
 ###  }
 ### ====================================================================

 BEGIN 		{ initialize() }

 /^[.]ie +t +[.]ds/ { getline }	# fall through: next line should be .el

 /^[.]el +.ds/	{ define($3); next }

 /^[.']\\"/	{ cmd_comment($0); next } # save comments

 /^[.]if +n *\\\{/ { cmd_comment($0); next }

 /^[.]if +t *\\\{/ { cmd_comment_block($0); next } # convert troff directives to comments

 /^ *\\\}/	{ cmd_comment($0); next }

 /^[.]if +t/	{ cmd_comment($0); next } # convert troff directives to comments

 /^[.]if +n +[.]ds/ { define($4); next }

 /^[.]if +n +[.]ti/ { cmd_comment($0); next } # convert nroff spacing directives to comments

 /^[.]ie +n +[.]ds/ { define($4); next }

 /^[.]SH/	{ cmd_SH(); next }

 /^[.]SS/	{ cmd_SS(); next }

 /^[.]TH/	{ cmd_TH(); next }

 /^[.]B /	{ cmd_B(); next }

 /^[.]I /	{ cmd_I(); next }

 /^[.]IX /	{ cmd_IX(); next }

 /^[.]R /	{ cmd_R(); next }

 /^[.]ad/	{ cmd_ad(); next }

 /^[.][BIR]$/	{ cmd_BIR(); next }

 /^[.]BI /	{ cmd_XY("B","I"); next }

 /^[.]br/	{ cmd_br(); next }

 /^[.]BR /	{ cmd_XY("B","R"); next }

 /^[.]ce[ 0-9]*$/ { cmd_ce(); next }

 /^[.]hw/	{ cmd_hw(); next }

 /^[.]IB /	{ cmd_XY("I","B"); next }

 /^[.]IR /	{ cmd_XY("I","R"); next }

 /^[.]ne/	{ cmd_ne(); next }

 /^[.]RB /	{ cmd_XY("R","B"); next }

 /^[.]RI /	{ cmd_XY("R","I"); next }

 /^[.]nf/	{ cmd_nf(); next }

 /^[.]fi/	{ cmd_fi(); next }

 /^[.]IP/	{ cmd_IP(); next }

 /^[.]LP/	{ cmd_LP(); next }

 /^[.]na/	{ cmd_na(); next }

 /^[.]PP/	{ cmd_PP(); next }

 /^[.]RE/	{ cmd_RE(); next }

 /^[.]RS/	{ cmd_RS(); next }

 /^[.]sp/	{ cmd_sp(); next }

 /^[.]TP/	{ cmd_TP(); next }

 /^[.]TS/	{ cmd_TS(); next }

 /^[.][A-Za-z]/	{ cmd_unknown(); next }

 		{ print_line(strtohtml($0)) }

 END		{ terminate(); }


 # The anchor() function is adapted from my bibtex-to-html.awk file

 function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength,save)
 {
    # Add anchors <A type="....">...</A> around text in s matching
    # pattern.  A non-zero offset discards that many characters from
    # the start of the match, allowing the pattern to contain leading
    # context which goes outside the anchored region.  The prefix is
    # attached to the start of the matched string, inside the value
    # quotes in the anchor.

    if (match(s,pattern))
    {
 	rstart = RSTART		# need private copies of these globals because
 	rlength = RLENGTH	# recursion will change them

 	rstart += offset	# adjust by offset to discard leading
 	rlength -= offset	# context in pattern

 	name = substr(s,rstart,rlength)
 	sub(/ +at +/,"@",name)	# reduce "user at host" to "user@host"

 	s = substr(s,1,rstart-1) \
 	    "<A " type "=\"" prefix name "\">" \
 	    ((type == "NAME") ? "<STRONG>" : "") \
 	    substr(s,rstart,rlength) \
 	    ((type == "NAME") ? "</STRONG>" : "") \
 	    "</A>" \
 	    anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
    }
    return (s)
 }


 function begin_toc()
 {
    print_toc("<H1>")
    print_toc("Table of contents")
    print_toc("</H1>")
    print_toc("<UL>")
    In_TOC_Item = 0
 }


 function cmd_ad()
 {				# .ad: turn on adjust (flush-left-and-right justification)
    cmd_comment($0)		# no HTML equivalent
 }


 function cmd_B( s)
 {
    end_font()
    if (match($0,/^[.]B *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line("<STRONG>" strtohtml(s) "</STRONG>")
    }
    else
 	print_line("<STRONG>" strtohtml($2) "</STRONG>")
 }


 function cmd_BIR()
 {
    end_font()
    print_line(strtohtml("\\f" substr($0,2,1))) # Remap .B into \fB etc
 }


 function cmd_br()
 {
    cmd_PP()
 }


 function cmd_ce( k,n)
 {
    # .ce nnn: turn on centering for next nnn lines (nnn = 0 turns it off)

    n = $2
    cmd_comment($0)
    if (n > 0)
    {
 	# The HTML 3.2 grammar supports <CENTER> ... </CENTER> as a
 	# shorthand for the more general <DIV ALIGN=CENTER> ... </DIV>
 	# (CENTER can be replaced by LEFT or RIGHT).  However, except
 	# for amaya (W3C's testbed for HTML 3.2), none of the current
 	# browsers support DIV. grail, hotjava, netscape all recognize
 	# CENTER. arena, chimera, lynx, and xmosaic do not recognize it
 	# either.
 	if (HTML == "3.2")
 	    print_line("<CENTER>")
 	for (k = 1; k <= n; ++k)
 	{
 	    getline
 	    print_line(strtohtml($0) "<BR>")
 	}
 	if (HTML == "3.2")
 	    print_line("</CENTER>")
    }
 }


 function cmd_comment(s)
 {
    In_Comment = 1
    sub(/^[.']\\"/,"",s)	# remove troff comment prefix: it confuses html-pretty
    print_line("<!-- " strtohtml(s) " -->")
    In_Comment = 0
 }


 function cmd_comment_block(s)
 {
    cmd_comment(s)

    In_Comment = 1
    while (getline s > 0)
    {
 	cmd_comment(s)
 	if (s ~ /^ *\\\}/)
 	    break		# found end of block
    }
    In_Comment = 0
 }


 function cmd_I( s)
 {
    end_font()
    if (match($0,/^[.]I *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line("<EM>" strtohtml(s) "</EM>")
    }
    else
 	print_line("<EM>" strtohtml($2) "</EM>")

 }


 function cmd_IX()
 {
    # .IX index entry lines are simply discarded
    while (match($0,/\\$/) && (getline > 0))
 	;			# discard continuation lines
 }


 function cmd_fi()
 {
    end_font()
    if (In_PRE)
    {
 	print_line("</PRE>")
 	In_PRE = 0
    }
    else
 	cmd_comment($0)
 }


 function cmd_hw()
 {				# .hw word-hyph-en-a-tion ex-cep-tions
    cmd_comment($0)
 }


 function cmd_IP()
 {
    end_font()
    PP++
    print_line("<P>")
 }


 function cmd_LP()
 {
    end_font()
    PP++
    print_line("<P>")
 }


 function cmd_na()
 {	# .na: no adjust: turn off flush-left-and-right justification, producing ragged-right
    cmd_comment($0)		# no HTML equivalent
 }


 function cmd_ne()
 {				# .ne dimen: need dimen vertical space before end of page
 				# otherwise, force a page break (e.g. to prevent page
 				# breaks after headings)
    cmd_comment($0)
 }


 function cmd_nf()
 {
    end_font()
    if (In_PRE)
 	cmd_comment($0)
    else
    {
 	print_line("<PRE>")
 	In_PRE = 1
    }
 }


 function cmd_PP()
 {
    end_font()
    PP++

    if (In_PRE)			# <P> tags are illegal in <PRE>...</PRE> environments
 	print_line("")
    else
 	print_line("<P>")

    end_TP()
 }


 function cmd_R( s)
 {
    end_font()
    if (match($0,/^[.]R *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line(strtohtml(s))
    }
    else
 	print_line(strtohtml($2))
 }


 function cmd_RE()
 {
    end_font()
    if (In_PRE)			# should not happen, but some man pages
 	cmd_fi()		# are irregular
    while (List_Level > RSE_List_Level[RSE_Level])
 	end_TP()
    if (RSE_Level > 0)
 	RSE_Level--
    print_line("</BLOCKQUOTE>")
 }


 function cmd_RS()
 {
    end_font()
    RSE_List_Level[++RSE_Level] = List_Level
    List_Level++		# new .TP level too
    print_line("<BLOCKQUOTE>")
 }


 function cmd_SH( s)
 {				# section heading
    cmd_SH_SS("H1")
 }


 function cmd_SS( s)
 {				# subsection heading
    cmd_SH_SS("H2")
 }


 function cmd_SH_SS(tag, s)
 {				# [sub]section heading
    if (!TH_seen)		# should not happen, but some man pages are
 	cmd_TH(substr($0,5))	# irregular
    end_font()
    while (RSE_Level > 0)
 	cmd_RE()
    while (List_Level > 0)
 	end_TP()
    if (tag == "H1")
    {
 	H1++
 	if (H1 == 1)
 	    begin_toc()
 	if (H2 > 0)
 	{
 	    print_toc("</LI>")
 	    print_toc("</UL>")
 	}
 	H2 = 0
 	if (H1 > 1)
 	    print_line("<HR>")	# a separating horizontal rule is a nice touch
    }
    else if (tag == "H2")
    {
 	H2++
    }
    s = substr($0,5)
    sub(/^ *\"/,"",s)
    sub(/\" *$/,"",s)
    s = strtohtml(s)

    SH_SS_count = "." H1
    if (H2 > 0)
 	SH_SS_count = SH_SS_count "." H2

    print_line("<" tag ">")
    print_line("<A NAME=\"HDR" SH_SS_count "\">")
    print_line(s)
    print_line("</A>")
    print_line("</" tag ">")

    if (In_TOC_Item && (H2 != 1))
 	print_toc("</LI>")
    if (H2 == 1)
 	print_toc("<UL>")
    In_TOC_Item = 1
    print_toc("<LI>")
    print_toc("<A HREF=\"#HDR" SH_SS_count "\">")
    print_toc(s)
    print_toc("</A>")
 }


 function cmd_sp()
 {				# .sp nnn: vertical space
    cmd_comment($0)		# no sensible HTML equivalent
 }


 function cmd_TH( line)
 {
    end_font()
    print_line("<HTML>")
    print_line("<HEAD>")
    print_line("<TITLE>")
    line = $0
    while (line ~ /\\$/)
    {
 	getline
 	line = substr(line,1,length(line)-1) $0
    }
    print_line(strtohtml(substr(line,4)))
    print_line("</TITLE>")
    print_line("<LINK REV=\"made\" HREF=\"mailto:" LOGNAME "@" HOSTNAME "\">")
    print_line("</HEAD>")
    print_line("")
    print_line("<BODY>")
    print_line("")
    TH_seen = 1
 }


 function cmd_TP()
 {
    end_font()
    getline		# this is the item label, usually "\(bu" or ".B ..."
    if (Item_Count[List_Level] == 0) # then first item of new list
    {
 	List_Level++
 	Item_Count[List_Level] = 0
        if ($0 == "\\(bu")
 	{
 	    List_Name[List_Level] = "UL"
 	    List_Item[List_Level] = "LI"
 	}
 	else
 	{
 	    List_Name[List_Level] = "DL"
 	    List_Item[List_Level] = "DT"
 	}
 	if (Item_Count[List_Level] == 0)
 	    print_line("<" List_Name[List_Level] ">")
    }
    Item_Count[List_Level]++
    if (List_Name[List_Level] == "DL")
    {
 	if (Item_Count[List_Level] > 1)
 	    print_line("</DD>")
 	print_line("<DT>")
 	if ($0 ~ /^[.]B /)
 	    cmd_B()
 	else if ($0 ~ /^[.]I /)
 	    cmd_I()
 	else if ($0 ~ /^[.]R /)
 	    cmd_R()
 	else if ($0 ~ /^[.]BR/)
 	    cmd_XY("B","R")
 	else if ($0 ~ /^[.]BI/)
 	    cmd_XY("B","I")
 	else if ($0 ~ /^[.]IB/)
 	    cmd_XY("I","B")
 	else if ($0 ~ /^[.]IR/)
 	    cmd_XY("I","R")
 	else if ($0 ~ /^[.]RB/)
 	    cmd_XY("R","B")
 	else if ($0 ~ /^[.]RI/)
 	    cmd_XY("R","I")
 	else
 	    print_line(strtohtml($0))
 	end_font()
 	if (In_PRE)		# should not happen, but some man pages
 	    cmd_fi()		# are irregular
 	print_line("</DT>")
 	print_line("<DD>")
    }
    else			# must be <UL> <LI> ... </LI> </UL> type list
    {
 	if (Item_Count[List_Level] > 1)
 	    print_line("</LI>")
 	print_line("<LI>")
    }
 }


 function cmd_TS( tbl_nroff_cmd)
 {
    # Copy the table to a temporary file
    print $0 >TBLFILE
    while (getline > 0)
    {
 	print $0 >TBLFILE
 	if ($0 ~ /^[.]TE/)	# then end of table found
 	    break
    }
    close (TBLFILE)

    # Run tbl, nroff, and col to convert the table to
    # formatted text, and include it as a preformatted
    # environment.
    tbl_nroff_cmd = "tbl " TBLFILE " | nroff -man | col -b"

    print_line("<PRE>")
    while ((tbl_nroff_cmd | getline) > 0)
 	print_line(strtohtml($0))
    print_line("</PRE>")
    close (tbl_nroff_cmd)
    delete_file(TBLFILE)
 }


 function cmd_unknown()
 {
    end_font()
    warning("Unrecognized nroff/troff command in [" $0 "] changed to comment")
    cmd_comment($0)
 }


 function cmd_XY(x,y, font,k)
 {
    end_font()
    protect_quoted_args()
    for (k = 2; k <= NF; ++k)
    {
 	font = Font_Map[(k % 2) ? y : x]
 	printf("%s%s%s", html_font_begintag(font), strtohtml(unprotect_quoted_arg($k)), \
 	       html_font_endtag(font)) > TMPFILE
    }
    print_line("")
 }


 function define(name, regexp)
 {
    # Typical values:
    # .if n .ds Bi BibTeX
    # .el .ds Bi BibTeX
    # Macro used as \*(Bi, but stored as a regexp
    regexp = "\\\\\\*\\(" name
    Macro[regexp] = substr($0,index($0,name)+3)
 }


 function delete_file(s)
 {
    system("/bin/rm -f " s)
 }


 function end_font()
 {
    for (; Font_Level > 0; Font_Level--)
 	print_line(html_font_endtag(HTML_Font_Name[Font_Level]))
 }


 function end_toc()
 {
    print_toc("</LI>")
    print_toc("</UL>")
    print_toc("<HR>")
    close (TOCFILE)
 }


 function end_TP()
 {
    if (Item_Count[List_Level] > 0)
    {
 	if (List_Name[List_Level] == "DL")
 	{
 	    print_line("</DD>")
 	    print_line("</DL>")
 	}
 	else
 	{
 	    print_line("</LI>")
 	    print_line("</UL>")
 	}
    }
    Item_Count[List_Level] = 0
    if (List_Level > 0)
 	List_Level--
 }


 function font_sub(s, tag)
 {
    while (match(s,/\\f[BCIPRST]/))
    {
 	if (substr(s,RSTART+2,1) == "P") # revert to previous font
 	{
 	    tag = html_font_endtag(HTML_Font_Name[Font_Level])
 	    if (Font_Level > 0)
 		Font_Level--
 	}
 	else			# set explicit font
 	{
 	    Font_Level++
 	    HTML_Font_Name[Font_Level] = Font_Map[substr(s,RSTART+2,1)]
 	    tag = html_font_begintag(HTML_Font_Name[Font_Level])
 	    # Handle ...\fB...\fR... style by ending previous font
 	    if (Font_Level > 1)
 	    {
 		tag = html_font_endtag(HTML_Font_Name[Font_Level-1]) tag
 		HTML_Font_Name[Font_Level-1] = HTML_Font_Name[Font_Level]
 		Font_Level--
 	    }
 	}
 	s = substr(s,1,RSTART-1) tag substr(s,RSTART+3)
    }
    return (s)
 }


 function html_font_begintag(name)
 {
    if (name == "")
 	return ""
    else
 	return "<" name ">"
 }


 function html_font_endtag(name)
 {
    if (name == "")
 	return ""
    else
 	return "</" name ">"
 }


 function initialize()
 {
    # Change these two lines whenever the program is modified
    VERSION_NUMBER = "1.06"
    VERSION_DATE = "[24-Oct-1997]"

    VERSION = "Version " VERSION_NUMBER " " VERSION_DATE

    "echo $LOGNAME" | getline LOGNAME
    "hostname" | getline HOSTNAME
    "date" | getline DATE

    if (HTML == "")
 	HTML = 2
    if ((HTML != 2) && (HTML != 3) && (HTML != "3.2"))
    {
        warning("Unsupported HTML level " HTML " requested: defaulting to HTML level 2")
 	HTML = 2
    }

    Font_Map["B"] = "STRONG"
    Font_Map["C"] = "TT"
    Font_Map["I"] = "EM"
    Font_Map["R"] = ""
    Font_Map["S"] = ""		# cannot map symbol font yet
    Font_Map["T"] = "TT"

    Macro["\\\\e"]	= "\\"
    if (HTML == 2)
 	Macro["\\\\0"]	= "\\&#160;"	# change non-breakable space to numeric entity
    else if (HTML >= 3)
 	Macro["\\\\0"]	= "\\&nbsp;"	# can finally use named entity
    else
 	warning("No conversion implemented for \\\\0 (non-breakable space) in HTML level", HTML)

    TOCFILE = "/tmp/man2html.toc"
    TBLFILE = "/tmp/man2html.tbl"
    TMPFILE = "/tmp/man2html.tmp"
    H1 = 0
    H2 = 0

    Macro["\\\\\\(bu"]	= "\\&#164;"
    Macro["\\\\\\(em"]	= "---"
    Macro["\\\\\\(en"]	= "--"

    # The following fragment for setting URL_xxx variables
    # is borrowed intact from my bibtex-to-html.awk file:
    #
    # According to Internet RFC 1614 (May 1994), a URL is
    # defined in the document T. Berners-Lee, ``Uniform
    # Resource Locators'', March 1993, available at URL
    # ftp://info.cern.ch/pub/ietf/url4.ps.  Unfortunately,
    # that address is no longer valid.  However, I was able to
    # track down pointers from http://www.w3.org/ to locate a
    # suitable description in Internet RFC 1630 (June 1994).

    # NB: We additionally disallow & in a URL because it is
    # needed in SGML entities "&name;".  We also disallow =
    # and | because these are commonly used in \path=...= and
    # \path|...| strings in BibTeX files.  These restrictions
    # could be removed if we went to the trouble of first
    # encoding these special characters in %xy hexadecimal
    # format, but they are rare enough that I am not going to
    # do so for now.  The worst that will happen from this
    # decision is that an occasional URL in a BibTeX file will
    # be missing a surrounding anchor.

    # Bug fix [24-Oct-1997]: Add < and > to the set of excluded
    # characters, to avoid incorrectly including SGML markup inside a
    # URL.  Before this fix, "\fChttp://www/\fP" got translated
    # incorrectly to
    #     <TT><A HREF="http://www/</TT>">http://www/</TT></A>
    # instead of the correct
    #     <TT><A HREF="http://www">http://www</A></TT>

    URL_PATTERN = "[A-Za-z]+://[^ \",&=|<>]+"
    URL_OFFSET = 0
    URL_PREFIX = ""
    URL_SAVE_LABEL = 0

    E_MAIL_PATTERN = "[A-Za-z0-9_-]+@[A-Za-z0-9-]+([.][A-Za-z0-9-]+)*"
    E_MAIL_OFFSET = 0
    E_MAIL_PREFIX = "mailto:"
    E_MAIL_SAVE_LABEL = 0

    print_header()
 }


 function print_header()
 {
    print_line("<!-- Warning: Do NOT edit this file. -->")
    print_line("<!-- It was created automatically by man2html.awk " VERSION " on " DATE " -->")
    print_line("<!-- from the file " strtohtml(FILENAME) " at " HOSTNAME " -->")
    print_line("")

    if (HTML == 2)
 	print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">")
    else if (HTML == 3)	# We need level 3 HTML only because of our use of &nbsp; and &shy;
 	print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML 3.0//EN\">")
    else if (HTML == "3.2")	# HTML 3.2 released 5-Nov-1996 at http://www.w3.org/pub/WWW
 	print_line("<!DOCTYPE HTML public \"-//W3C//DTD HTML 3.2//EN\">")
 }


 function print_line(s)
 {
    print s >TMPFILE
 }


 function print_toc(s)
 {
    print s >TOCFILE
 }


 function protect_quoted_args( inside,k,s)
 {
    if (index($0,"\"") == 0)
 	return
    s = $0
    inside = 0
    for (k = 1; k <= length(s); ++k)
    {
 	if (substr(s,k,1) == "\"")
 	    inside = !inside
 	else if (inside && (substr(s,k,1) == " "))
 	    s = substr(s,1,k-1) "\177" substr(s,k+1)
    }
    $0 = s
 }


 function strtohtml(s, name)
 {
    gsub(/\\$/,"",s)		# discard backslash-newline
    gsub(/\\-/,"-",s)		# show troff minus as ASCII minus
    gsub(/\\[&]/,"",s)		# remove no-op macros
    # gsub(/\\[|]/," ",s)	# change thin space to space
    gsub(/\\[|]/,"",s)		# delete thin space (nroff does too)

    gsub(/[&]/,"\\&amp;",s)	# protect 3 or 4
    gsub(/</,"\\&lt;",s)	# special SGML
    gsub(/>/,"\\&gt;",s)	# characters

    if (HTML == 2)
    {
 	gsub(/\\ /,"\\&#160;",s)# represent literal space by numeric entity
 	gsub(/\\%/,"",s)	# squeeze out discretionary hyphens
    }
    else if (HTML >= 3)
    {
 	gsub(/\\ /,"\\&nbsp;",s) # preserve literal spaces

 	# NB: several browers fail to implement soft hyphen properly: they show
 	# it as an explicit hyphen when the word is not broken at end of line,
 	# instead of discarding it.  We translate it correctly, and hope that
 	# broken browsers eventually get fixed, sigh...

 	gsub(/\\%/,"\\&shy;",s)	# discretionary hyphen -> soft hyphen
    }
    if (In_Comment)
 	gsub(/--/,"__",s)	# must hide -- pairs to avoid grammar error
    else if (HTML == "3.2")
 	gsub(/\"/,"\\&#34;",s)	# &quot; was left out of HTML 3.2, sigh...
    else
 	gsub(/\"/,"\\&quot;",s)	# but other versions, and SGML, have &quot;

    # It is curious that browsers can display a bullet, but there is no
    # HTML markup to represent it, and it is absent from the standard
    # ISO8859-1 fonts
    # gsub(/\\\(bu/,"\\&#164;",s)	# change bullets to general currency sign
 				# &curren; but use numeric code because
 				# xmosaic does not recognize it

    for (name in Macro)		# substitute macro names
 	gsub(name,Macro[name],s)

    s = font_sub(s)

    gsub(/\\\\/,"\\",s)		# reduce troff doubled backslash to single HTML one

 #    if (index(s,"\\") > 0)	# check for anything we missed
 #	warning("Possible unrecognized nroff/troff markup in [" s "]")

    if (!In_Comment)		# no link inside comment; otherwise, browser shows text
    {
 	s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
 	s = anchor(s,"HREF",E_MAIL_PATTERN,E_MAIL_OFFSET,E_MAIL_PREFIX, \
 		   E_MAIL_SAVE_LABEL)
    }

    return (s)
 }


 function terminate( x,y)
 {
    print_line("</BODY>")
    print_line("</HTML>")
    close (TMPFILE)
    end_toc()

    while (getline x < TMPFILE > 0)
    {
        if (x == "<H1>")
 	    break
 	print x
    }

    while (getline y < TOCFILE > 0)
 	print y
    close (TOCFILE)
    delete_file(TOCFILE)

    print x
    while (getline x < TMPFILE > 0)
 	print x
    close (TMPFILE)
    delete_file(TMPFILE)
 }


 function unprotect_quoted_arg(s)
 {
    sub(/^"/,"",s)		# remove leading and
    sub(/"$/,"",s)		# trailing quotes and
    gsub(/\177/," ",s)		# restore spaces
    return (s)
 }


 function warning(message)
 {
    print FILENAME ":" FNR ":%%" message >"/dev/stderr"
 }
 ### ====================================================================
 ###  @Awk-file{
 ###     author          = "Nelson H. F. Beebe",
 ###     version         = "1.06",
 ###     date            = "24 October 1997",
 ###     time            = "21:34:34 MDT",
 ###     filename        = "man2html.awk",
 ###     address         = "Center for Scientific Computing
 ###                        University of Utah
 ###                        Department of Mathematics, 105 JWB
 ###                        155 S 1400 E RM 233
 ###                        Salt Lake City, UT 84112-0090
 ###                        USA",
 ###     telephone       = "+1 801 581 5254",
 ###     FAX             = "+1 801 581 4148",
 ###     URL             = "http://www.math.utah.edu/~beebe",
 ###     checksum        = "01400 968 2975 23193",
 ###     email           = "[email protected] (Internet)",
 ###     codetable       = "ISO/ASCII",
 ###     keywords        = "nroff, troff, UNIX manual page",
 ###     supported       = "yes",
 ###     docstring       = "This program converts UNIX manual pages
 ###                        in nroff/troff markup to strictly-conformant
 ###                        HTML 2.0, 3.0, or 3.2.  [Actually, only two
 ###                        HTML 3.x entities (`&nbsp;' and `&shy;')
 ###                        are used, and those rarely; otherwise, the
 ###                        syntax conforms strictly to HTML 2.0.]
 ###
 ###                        Usage:
 ###                        	nawk -f man2html.awk [HTML=2|3|3.2] \
 ###                                manpage-file >html-file
 ###
 ###                        The single option, HTML=2, HTML=3, or
 ###                        HTML=3.2, selects the HTML grammar level.
 ###                        The default is HTML=2.
 ###
 ###                        This program is normally run via a shell
 ###                        wrapper that offers an option for setting the
 ###                        output file name.  It has been used to
 ###                        successfully convert entire man-page
 ###                        collections on several UNIX systems to HTML
 ###                        form for convenient World-Wide Web browser
 ###                        access.
 ###
 ###                        Of those nroff/troff commands defined in the
 ###                        -man format used for UNIX manual pages, only
 ###                        the most commonly-used ones are supported;
 ###                        unrecognized ones will be warned about, and
 ###                        preserved as HTML comments in the output.
 ###
 ###                        UNIX man pages tend to be written in a
 ###                        highly-stylized fashion that we apply
 ###                        heuristics to in order to recover high-level
 ###                        HTML structure from low-level nroff/troff
 ###                        markup.  Deviations from conventional
 ###                        man-page writing practice will likely result
 ###                        in less-than-perfect translation to HTML.
 ###
 ###                        Although there are several other `man2html'
 ###                        translators available on the Internet, this
 ###                        one is entirely of my own authorship, with no
 ###                        code borrowing from anywhere else.
 ###
 ###                        The checksum field above contains a CRC-16
 ###                        checksum as the first value, followed by the
 ###                        equivalent of the standard UNIX wc (word
 ###                        count) utility output of lines, words, and
 ###                        characters.  This is produced by Robert
 ###                        Solovay's checksum utility.",
 ###  }
 ### ====================================================================

 BEGIN 		{ initialize() }

 /^[.]ie +t +[.]ds/ { getline }	# fall through: next line should be .el

 /^[.]el +.ds/	{ define($3); next }

 /^[.']\\"/	{ cmd_comment($0); next } # save comments

 /^[.]if +n *\\\{/ { cmd_comment($0); next }

 /^[.]if +t *\\\{/ { cmd_comment_block($0); next } # convert troff directives to comments

 /^ *\\\}/	{ cmd_comment($0); next }

 /^[.]if +t/	{ cmd_comment($0); next } # convert troff directives to comments

 /^[.]if +n +[.]ds/ { define($4); next }

 /^[.]if +n +[.]ti/ { cmd_comment($0); next } # convert nroff spacing directives to comments

 /^[.]ie +n +[.]ds/ { define($4); next }

 /^[.]SH/	{ cmd_SH(); next }

 /^[.]SS/	{ cmd_SS(); next }

 /^[.]TH/	{ cmd_TH(); next }

 /^[.]B /	{ cmd_B(); next }

 /^[.]I /	{ cmd_I(); next }

 /^[.]IX /	{ cmd_IX(); next }

 /^[.]R /	{ cmd_R(); next }

 /^[.]ad/	{ cmd_ad(); next }

 /^[.][BIR]$/	{ cmd_BIR(); next }

 /^[.]BI /	{ cmd_XY("B","I"); next }

 /^[.]br/	{ cmd_br(); next }

 /^[.]BR /	{ cmd_XY("B","R"); next }

 /^[.]ce[ 0-9]*$/ { cmd_ce(); next }

 /^[.]hw/	{ cmd_hw(); next }

 /^[.]IB /	{ cmd_XY("I","B"); next }

 /^[.]IR /	{ cmd_XY("I","R"); next }

 /^[.]ne/	{ cmd_ne(); next }

 /^[.]RB /	{ cmd_XY("R","B"); next }

 /^[.]RI /	{ cmd_XY("R","I"); next }

 /^[.]nf/	{ cmd_nf(); next }

 /^[.]fi/	{ cmd_fi(); next }

 /^[.]IP/	{ cmd_IP(); next }

 /^[.]LP/	{ cmd_LP(); next }

 /^[.]na/	{ cmd_na(); next }

 /^[.]PP/	{ cmd_PP(); next }

 /^[.]RE/	{ cmd_RE(); next }

 /^[.]RS/	{ cmd_RS(); next }

 /^[.]sp/	{ cmd_sp(); next }

 /^[.]TP/	{ cmd_TP(); next }

 /^[.]TS/	{ cmd_TS(); next }

 /^[.][A-Za-z]/	{ cmd_unknown(); next }

 		{ print_line(strtohtml($0)) }

 END		{ terminate(); }


 # The anchor() function is adapted from my bibtex-to-html.awk file

 function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength,save)
 {
    # Add anchors <A type="....">...</A> around text in s matching
    # pattern.  A non-zero offset discards that many characters from
    # the start of the match, allowing the pattern to contain leading
    # context which goes outside the anchored region.  The prefix is
    # attached to the start of the matched string, inside the value
    # quotes in the anchor.

    if (match(s,pattern))
    {
 	rstart = RSTART		# need private copies of these globals because
 	rlength = RLENGTH	# recursion will change them

 	rstart += offset	# adjust by offset to discard leading
 	rlength -= offset	# context in pattern

 	name = substr(s,rstart,rlength)
 	sub(/ +at +/,"@",name)	# reduce "user at host" to "user@host"

 	s = substr(s,1,rstart-1) \
 	    "<A " type "=\"" prefix name "\">" \
 	    ((type == "NAME") ? "<STRONG>" : "") \
 	    substr(s,rstart,rlength) \
 	    ((type == "NAME") ? "</STRONG>" : "") \
 	    "</A>" \
 	    anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
    }
    return (s)
 }


 function begin_toc()
 {
    print_toc("<H1>")
    print_toc("Table of contents")
    print_toc("</H1>")
    print_toc("<UL>")
    In_TOC_Item = 0
 }


 function cmd_ad()
 {				# .ad: turn on adjust (flush-left-and-right justification)
    cmd_comment($0)		# no HTML equivalent
 }


 function cmd_B( s)
 {
    end_font()
    if (match($0,/^[.]B *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line("<STRONG>" strtohtml(s) "</STRONG>")
    }
    else
 	print_line("<STRONG>" strtohtml($2) "</STRONG>")
 }


 function cmd_BIR()
 {
    end_font()
    print_line(strtohtml("\\f" substr($0,2,1))) # Remap .B into \fB etc
 }


 function cmd_br()
 {
    cmd_PP()
 }


 function cmd_ce( k,n)
 {
    # .ce nnn: turn on centering for next nnn lines (nnn = 0 turns it off)

    n = $2
    cmd_comment($0)
    if (n > 0)
    {
 	# The HTML 3.2 grammar supports <CENTER> ... </CENTER> as a
 	# shorthand for the more general <DIV ALIGN=CENTER> ... </DIV>
 	# (CENTER can be replaced by LEFT or RIGHT).  However, except
 	# for amaya (W3C's testbed for HTML 3.2), none of the current
 	# browsers support DIV. grail, hotjava, netscape all recognize
 	# CENTER. arena, chimera, lynx, and xmosaic do not recognize it
 	# either.
 	if (HTML == "3.2")
 	    print_line("<CENTER>")
 	for (k = 1; k <= n; ++k)
 	{
 	    getline
 	    print_line(strtohtml($0) "<BR>")
 	}
 	if (HTML == "3.2")
 	    print_line("</CENTER>")
    }
 }


 function cmd_comment(s)
 {
    In_Comment = 1
    sub(/^[.']\\"/,"",s)	# remove troff comment prefix: it confuses html-pretty
    print_line("<!-- " strtohtml(s) " -->")
    In_Comment = 0
 }


 function cmd_comment_block(s)
 {
    cmd_comment(s)

    In_Comment = 1
    while (getline s > 0)
    {
 	cmd_comment(s)
 	if (s ~ /^ *\\\}/)
 	    break		# found end of block
    }
    In_Comment = 0
 }


 function cmd_I( s)
 {
    end_font()
    if (match($0,/^[.]I *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line("<EM>" strtohtml(s) "</EM>")
    }
    else
 	print_line("<EM>" strtohtml($2) "</EM>")

 }


 function cmd_IX()
 {
    # .IX index entry lines are simply discarded
    while (match($0,/\\$/) && (getline > 0))
 	;			# discard continuation lines
 }


 function cmd_fi()
 {
    end_font()
    if (In_PRE)
    {
 	print_line("</PRE>")
 	In_PRE = 0
    }
    else
 	cmd_comment($0)
 }


 function cmd_hw()
 {				# .hw word-hyph-en-a-tion ex-cep-tions
    cmd_comment($0)
 }


 function cmd_IP()
 {
    end_font()
    PP++
    print_line("<P>")
 }


 function cmd_LP()
 {
    end_font()
    PP++
    print_line("<P>")
 }


 function cmd_na()
 {	# .na: no adjust: turn off flush-left-and-right justification, producing ragged-right
    cmd_comment($0)		# no HTML equivalent
 }


 function cmd_ne()
 {				# .ne dimen: need dimen vertical space before end of page
 				# otherwise, force a page break (e.g. to prevent page
 				# breaks after headings)
    cmd_comment($0)
 }


 function cmd_nf()
 {
    end_font()
    if (In_PRE)
 	cmd_comment($0)
    else
    {
 	print_line("<PRE>")
 	In_PRE = 1
    }
 }


 function cmd_PP()
 {
    end_font()
    PP++

    if (In_PRE)			# <P> tags are illegal in <PRE>...</PRE> environments
 	print_line("")
    else
 	print_line("<P>")

    end_TP()
 }


 function cmd_R( s)
 {
    end_font()
    if (match($0,/^[.]R *\"/))
    {
 	s = substr($0,RSTART+RLENGTH)
 	gsub(/[" ]*$/,"",s)
 	print_line(strtohtml(s))
    }
    else
 	print_line(strtohtml($2))
 }


 function cmd_RE()
 {
    end_font()
    if (In_PRE)			# should not happen, but some man pages
 	cmd_fi()		# are irregular
    while (List_Level > RSE_List_Level[RSE_Level])
 	end_TP()
    if (RSE_Level > 0)
 	RSE_Level--
    print_line("</BLOCKQUOTE>")
 }


 function cmd_RS()
 {
    end_font()
    RSE_List_Level[++RSE_Level] = List_Level
    List_Level++		# new .TP level too
    print_line("<BLOCKQUOTE>")
 }


 function cmd_SH( s)
 {				# section heading
    cmd_SH_SS("H1")
 }


 function cmd_SS( s)
 {				# subsection heading
    cmd_SH_SS("H2")
 }


 function cmd_SH_SS(tag, s)
 {				# [sub]section heading
    if (!TH_seen)		# should not happen, but some man pages are
 	cmd_TH(substr($0,5))	# irregular
    end_font()
    while (RSE_Level > 0)
 	cmd_RE()
    while (List_Level > 0)
 	end_TP()
    if (tag == "H1")
    {
 	H1++
 	if (H1 == 1)
 	    begin_toc()
 	if (H2 > 0)
 	{
 	    print_toc("</LI>")
 	    print_toc("</UL>")
 	}
 	H2 = 0
 	if (H1 > 1)
 	    print_line("<HR>")	# a separating horizontal rule is a nice touch
    }
    else if (tag == "H2")
    {
 	H2++
    }
    s = substr($0,5)
    sub(/^ *\"/,"",s)
    sub(/\" *$/,"",s)
    s = strtohtml(s)

    SH_SS_count = "." H1
    if (H2 > 0)
 	SH_SS_count = SH_SS_count "." H2

    print_line("<" tag ">")
    print_line("<A NAME=\"HDR" SH_SS_count "\">")
    print_line(s)
    print_line("</A>")
    print_line("</" tag ">")

    if (In_TOC_Item && (H2 != 1))
 	print_toc("</LI>")
    if (H2 == 1)
 	print_toc("<UL>")
    In_TOC_Item = 1
    print_toc("<LI>")
    print_toc("<A HREF=\"#HDR" SH_SS_count "\">")
    print_toc(s)
    print_toc("</A>")
 }


 function cmd_sp()
 {				# .sp nnn: vertical space
    cmd_comment($0)		# no sensible HTML equivalent
 }


 function cmd_TH( line)
 {
    end_font()
    print_line("<HTML>")
    print_line("<HEAD>")
    print_line("<TITLE>")
    line = $0
    while (line ~ /\\$/)
    {
 	getline
 	line = substr(line,1,length(line)-1) $0
    }
    print_line(strtohtml(substr(line,4)))
    print_line("</TITLE>")
    print_line("<LINK REV=\"made\" HREF=\"mailto:" LOGNAME "@" HOSTNAME "\">")
    print_line("</HEAD>")
    print_line("")
    print_line("<BODY>")
    print_line("")
    TH_seen = 1
 }


 function cmd_TP()
 {
    end_font()
    getline		# this is the item label, usually "\(bu" or ".B ..."
    if (Item_Count[List_Level] == 0) # then first item of new list
    {
 	List_Level++
 	Item_Count[List_Level] = 0
        if ($0 == "\\(bu")
 	{
 	    List_Name[List_Level] = "UL"
 	    List_Item[List_Level] = "LI"
 	}
 	else
 	{
 	    List_Name[List_Level] = "DL"
 	    List_Item[List_Level] = "DT"
 	}
 	if (Item_Count[List_Level] == 0)
 	    print_line("<" List_Name[List_Level] ">")
    }
    Item_Count[List_Level]++
    if (List_Name[List_Level] == "DL")
    {
 	if (Item_Count[List_Level] > 1)
 	    print_line("</DD>")
 	print_line("<DT>")
 	if ($0 ~ /^[.]B /)
 	    cmd_B()
 	else if ($0 ~ /^[.]I /)
 	    cmd_I()
 	else if ($0 ~ /^[.]R /)
 	    cmd_R()
 	else if ($0 ~ /^[.]BR/)
 	    cmd_XY("B","R")
 	else if ($0 ~ /^[.]BI/)
 	    cmd_XY("B","I")
 	else if ($0 ~ /^[.]IB/)
 	    cmd_XY("I","B")
 	else if ($0 ~ /^[.]IR/)
 	    cmd_XY("I","R")
 	else if ($0 ~ /^[.]RB/)
 	    cmd_XY("R","B")
 	else if ($0 ~ /^[.]RI/)
 	    cmd_XY("R","I")
 	else
 	    print_line(strtohtml($0))
 	end_font()
 	if (In_PRE)		# should not happen, but some man pages
 	    cmd_fi()		# are irregular
 	print_line("</DT>")
 	print_line("<DD>")
    }
    else			# must be <UL> <LI> ... </LI> </UL> type list
    {
 	if (Item_Count[List_Level] > 1)
 	    print_line("</LI>")
 	print_line("<LI>")
    }
 }


 function cmd_TS( tbl_nroff_cmd)
 {
    # Copy the table to a temporary file
    print $0 >TBLFILE
    while (getline > 0)
    {
 	print $0 >TBLFILE
 	if ($0 ~ /^[.]TE/)	# then end of table found
 	    break
    }
    close (TBLFILE)

    # Run tbl, nroff, and col to convert the table to
    # formatted text, and include it as a preformatted
    # environment.
    tbl_nroff_cmd = "tbl " TBLFILE " | nroff -man | col -b"

    print_line("<PRE>")
    while ((tbl_nroff_cmd | getline) > 0)
 	print_line(strtohtml($0))
    print_line("</PRE>")
    close (tbl_nroff_cmd)
    delete_file(TBLFILE)
 }


 function cmd_unknown()
 {
    end_font()
    warning("Unrecognized nroff/troff command in [" $0 "] changed to comment")
    cmd_comment($0)
 }


 function cmd_XY(x,y, font,k)
 {
    end_font()
    protect_quoted_args()
    for (k = 2; k <= NF; ++k)
    {
 	font = Font_Map[(k % 2) ? y : x]
 	printf("%s%s%s", html_font_begintag(font), strtohtml(unprotect_quoted_arg($k)), \
 	       html_font_endtag(font)) > TMPFILE
    }
    print_line("")
 }


 function define(name, regexp)
 {
    # Typical values:
    # .if n .ds Bi BibTeX
    # .el .ds Bi BibTeX
    # Macro used as \*(Bi, but stored as a regexp
    regexp = "\\\\\\*\\(" name
    Macro[regexp] = substr($0,index($0,name)+3)
 }


 function delete_file(s)
 {
    system("/bin/rm -f " s)
 }


 function end_font()
 {
    for (; Font_Level > 0; Font_Level--)
 	print_line(html_font_endtag(HTML_Font_Name[Font_Level]))
 }


 function end_toc()
 {
    print_toc("</LI>")
    print_toc("</UL>")
    print_toc("<HR>")
    close (TOCFILE)
 }


 function end_TP()
 {
    if (Item_Count[List_Level] > 0)
    {
 	if (List_Name[List_Level] == "DL")
 	{
 	    print_line("</DD>")
 	    print_line("</DL>")
 	}
 	else
 	{
 	    print_line("</LI>")
 	    print_line("</UL>")
 	}
    }
    Item_Count[List_Level] = 0
    if (List_Level > 0)
 	List_Level--
 }


 function font_sub(s, tag)
 {
    while (match(s,/\\f[BCIPRST]/))
    {
 	if (substr(s,RSTART+2,1) == "P") # revert to previous font
 	{
 	    tag = html_font_endtag(HTML_Font_Name[Font_Level])
 	    if (Font_Level > 0)
 		Font_Level--
 	}
 	else			# set explicit font
 	{
 	    Font_Level++
 	    HTML_Font_Name[Font_Level] = Font_Map[substr(s,RSTART+2,1)]
 	    tag = html_font_begintag(HTML_Font_Name[Font_Level])
 	    # Handle ...\fB...\fR... style by ending previous font
 	    if (Font_Level > 1)
 	    {
 		tag = html_font_endtag(HTML_Font_Name[Font_Level-1]) tag
 		HTML_Font_Name[Font_Level-1] = HTML_Font_Name[Font_Level]
 		Font_Level--
 	    }
 	}
 	s = substr(s,1,RSTART-1) tag substr(s,RSTART+3)
    }
    return (s)
 }


 function html_font_begintag(name)
 {
    if (name == "")
 	return ""
    else
 	return "<" name ">"
 }


 function html_font_endtag(name)
 {
    if (name == "")
 	return ""
    else
 	return "</" name ">"
 }


 function initialize()
 {
    # Change these two lines whenever the program is modified
    VERSION_NUMBER = "1.06"
    VERSION_DATE = "[24-Oct-1997]"

    VERSION = "Version " VERSION_NUMBER " " VERSION_DATE

    "echo $LOGNAME" | getline LOGNAME
    "hostname" | getline HOSTNAME
    "date" | getline DATE

    if (HTML == "")
 	HTML = 2
    if ((HTML != 2) && (HTML != 3) && (HTML != "3.2"))
    {
        warning("Unsupported HTML level " HTML " requested: defaulting to HTML level 2")
 	HTML = 2
    }

    Font_Map["B"] = "STRONG"
    Font_Map["C"] = "TT"
    Font_Map["I"] = "EM"
    Font_Map["R"] = ""
    Font_Map["S"] = ""		# cannot map symbol font yet
    Font_Map["T"] = "TT"

    Macro["\\\\e"]	= "\\"
    if (HTML == 2)
 	Macro["\\\\0"]	= "\\&#160;"	# change non-breakable space to numeric entity
    else if (HTML >= 3)
 	Macro["\\\\0"]	= "\\&nbsp;"	# can finally use named entity
    else
 	warning("No conversion implemented for \\\\0 (non-breakable space) in HTML level", HTML)

    TOCFILE = "/tmp/man2html.toc"
    TBLFILE = "/tmp/man2html.tbl"
    TMPFILE = "/tmp/man2html.tmp"
    H1 = 0
    H2 = 0

    Macro["\\\\\\(bu"]	= "\\&#164;"
    Macro["\\\\\\(em"]	= "---"
    Macro["\\\\\\(en"]	= "--"

    # The following fragment for setting URL_xxx variables
    # is borrowed intact from my bibtex-to-html.awk file:
    #
    # According to Internet RFC 1614 (May 1994), a URL is
    # defined in the document T. Berners-Lee, ``Uniform
    # Resource Locators'', March 1993, available at URL
    # ftp://info.cern.ch/pub/ietf/url4.ps.  Unfortunately,
    # that address is no longer valid.  However, I was able to
    # track down pointers from http://www.w3.org/ to locate a
    # suitable description in Internet RFC 1630 (June 1994).

    # NB: We additionally disallow & in a URL because it is
    # needed in SGML entities "&name;".  We also disallow =
    # and | because these are commonly used in \path=...= and
    # \path|...| strings in BibTeX files.  These restrictions
    # could be removed if we went to the trouble of first
    # encoding these special characters in %xy hexadecimal
    # format, but they are rare enough that I am not going to
    # do so for now.  The worst that will happen from this
    # decision is that an occasional URL in a BibTeX file will
    # be missing a surrounding anchor.

    # Bug fix [24-Oct-1997]: Add < and > to the set of excluded
    # characters, to avoid incorrectly including SGML markup inside a
    # URL.  Before this fix, "\fChttp://www/\fP" got translated
    # incorrectly to
    #     <TT><A HREF="http://www/</TT>">http://www/</TT></A>
    # instead of the correct
    #     <TT><A HREF="http://www">http://www</A></TT>

    URL_PATTERN = "[A-Za-z]+://[^ \",&=|<>]+"
    URL_OFFSET = 0
    URL_PREFIX = ""
    URL_SAVE_LABEL = 0

    E_MAIL_PATTERN = "[A-Za-z0-9_-]+@[A-Za-z0-9-]+([.][A-Za-z0-9-]+)*"
    E_MAIL_OFFSET = 0
    E_MAIL_PREFIX = "mailto:"
    E_MAIL_SAVE_LABEL = 0

    print_header()
 }


 function print_header()
 {
    print_line("<!-- Warning: Do NOT edit this file. -->")
    print_line("<!-- It was created automatically by man2html.awk " VERSION " on " DATE " -->")
    print_line("<!-- from the file " strtohtml(FILENAME) " at " HOSTNAME " -->")
    print_line("")

    if (HTML == 2)
 	print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">")
    else if (HTML == 3)	# We need level 3 HTML only because of our use of &nbsp; and &shy;
 	print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML 3.0//EN\">")
    else if (HTML == "3.2")	# HTML 3.2 released 5-Nov-1996 at http://www.w3.org/pub/WWW
 	print_line("<!DOCTYPE HTML public \"-//W3C//DTD HTML 3.2//EN\">")
 }


 function print_line(s)
 {
    print s >TMPFILE
 }


 function print_toc(s)
 {
    print s >TOCFILE
 }


 function protect_quoted_args( inside,k,s)
 {
    if (index($0,"\"") == 0)
 	return
    s = $0
    inside = 0
    for (k = 1; k <= length(s); ++k)
    {
 	if (substr(s,k,1) == "\"")
 	    inside = !inside
 	else if (inside && (substr(s,k,1) == " "))
 	    s = substr(s,1,k-1) "\177" substr(s,k+1)
    }
    $0 = s
 }


 function strtohtml(s, name)
 {
    gsub(/\\$/,"",s)		# discard backslash-newline
    gsub(/\\-/,"-",s)		# show troff minus as ASCII minus
    gsub(/\\[&]/,"",s)		# remove no-op macros
    # gsub(/\\[|]/," ",s)	# change thin space to space
    gsub(/\\[|]/,"",s)		# delete thin space (nroff does too)

    gsub(/[&]/,"\\&amp;",s)	# protect 3 or 4
    gsub(/</,"\\&lt;",s)	# special SGML
    gsub(/>/,"\\&gt;",s)	# characters

    if (HTML == 2)
    {
 	gsub(/\\ /,"\\&#160;",s)# represent literal space by numeric entity
 	gsub(/\\%/,"",s)	# squeeze out discretionary hyphens
    }
    else if (HTML >= 3)
    {
 	gsub(/\\ /,"\\&nbsp;",s) # preserve literal spaces

 	# NB: several browers fail to implement soft hyphen properly: they show
 	# it as an explicit hyphen when the word is not broken at end of line,
 	# instead of discarding it.  We translate it correctly, and hope that
 	# broken browsers eventually get fixed, sigh...

 	gsub(/\\%/,"\\&shy;",s)	# discretionary hyphen -> soft hyphen
    }
    if (In_Comment)
 	gsub(/--/,"__",s)	# must hide -- pairs to avoid grammar error
    else if (HTML == "3.2")
 	gsub(/\"/,"\\&#34;",s)	# &quot; was left out of HTML 3.2, sigh...
    else
 	gsub(/\"/,"\\&quot;",s)	# but other versions, and SGML, have &quot;

    # It is curious that browsers can display a bullet, but there is no
    # HTML markup to represent it, and it is absent from the standard
    # ISO8859-1 fonts
    # gsub(/\\\(bu/,"\\&#164;",s)	# change bullets to general currency sign
 				# &curren; but use numeric code because
 				# xmosaic does not recognize it

    for (name in Macro)		# substitute macro names
 	gsub(name,Macro[name],s)

    s = font_sub(s)

    gsub(/\\\\/,"\\",s)		# reduce troff doubled backslash to single HTML one

 #    if (index(s,"\\") > 0)	# check for anything we missed
 #	warning("Possible unrecognized nroff/troff markup in [" s "]")

    if (!In_Comment)		# no link inside comment; otherwise, browser shows text
    {
 	s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
 	s = anchor(s,"HREF",E_MAIL_PATTERN,E_MAIL_OFFSET,E_MAIL_PREFIX, \
 		   E_MAIL_SAVE_LABEL)
    }

    return (s)
 }


 function terminate( x,y)
 {
    print_line("</BODY>")
    print_line("</HTML>")
    close (TMPFILE)
    end_toc()

    while (getline x < TMPFILE > 0)
    {
        if (x == "<H1>")
 	    break
 	print x
    }

    while (getline y < TOCFILE > 0)
 	print y
    close (TOCFILE)
    delete_file(TOCFILE)

    print x
    while (getline x < TMPFILE > 0)
 	print x
    close (TMPFILE)
    delete_file(TMPFILE)
 }


 function unprotect_quoted_arg(s)
 {
    sub(/^"/,"",s)		# remove leading and
    sub(/"$/,"",s)		# trailing quotes and
    gsub(/\177/," ",s)		# restore spaces
    return (s)
 }


 function warning(message)
 {
    print FILENAME ":" FNR ":%%" message >"/dev/stderr"
 }