|
1 | | -#! /usr/local/bin/python |
2 | | - |
3 | | -# Convert the Python FAQ to HTML |
4 | | - |
5 | | -import string |
6 | | -import regex |
7 | | -import regsub |
8 | | -import sys |
9 | | -import os |
10 | | - |
11 | | -FAQ = 'FAQ' |
12 | | - |
13 | | -chapterprog = regex.compile('^\([1-9][0-9]*\)\. ') |
14 | | -questionprog = regex.compile('^\([1-9][0-9]*\)\.\([1-9][0-9]*\)\. ') |
15 | | -newquestionprog = regex.compile('^Q\. ') |
16 | | -blankprog = regex.compile('^[ \t]*$') |
17 | | -indentedorblankprog = regex.compile('^\([ \t]+\|[ \t]*$\)') |
18 | | -underlineprog = regex.compile('^==*$') |
19 | | -eightblanksprog = regex.compile('^\( \| *\t\)') |
20 | | -mailheaderprog = regex.compile('^\(Subject\|Newsgroups\|Followup-To\|From\|Reply-To\|Approved\|Archive-name\|Version\|Last-modified\): +') |
21 | | -urlprog = regex.compile('<URL:\([^>]*\)>') |
22 | | -ampprog = regex.compile('&') |
23 | | -aprog = regex.compile('^A\. +') |
24 | | -qprog = regex.compile('>Q\. +') |
25 | | -qrefprog = regex.compile('question +\([0-9]\.[0-9]+\)') |
26 | | -versionprog = regex.compile('^Version: ') |
27 | | -emailprog = regex.compile('<\([^>@:]+@[^>@:]+\)>') |
28 | | - |
29 | | -def main(): |
30 | | - print 'Reading lines...' |
31 | | - lines = open(FAQ, 'r').readlines() |
32 | | - print 'Renumbering in memory...' |
33 | | - oldlines = lines[:] |
34 | | - after_blank = 1 |
35 | | - chapter = 0 |
36 | | - question = 0 |
37 | | - chapters = ['<OL>'] |
38 | | - questions = ['<OL>'] |
39 | | - for i in range(len(lines)): |
40 | | - line = lines[i] |
41 | | - if after_blank: |
42 | | - n = chapterprog.match(line) |
43 | | - if n >= 0: |
44 | | - chapter = chapter + 1 |
45 | | - if chapter != 1: |
46 | | - questions.append('</UL>\n') |
47 | | - question = 0 |
48 | | - lines[i] = '<H2>' + line[n:-1] + '</H2>\n' |
49 | | - chapters.append('<LI> ' + line[n:]) |
50 | | - questions.append('<LI> ' + line[n:]) |
51 | | - questions.append('<UL>\n') |
52 | | - afterblank = 0 |
53 | | - continue |
54 | | - n = underlineprog.match(line) |
55 | | - if n >= 0: |
56 | | - lines[i] = '' |
57 | | - continue |
58 | | - n = questionprog.match(line) |
59 | | - if n < 0: n = newquestionprog.match(line) - 3 |
60 | | - if n >= 0: |
61 | | - question = question + 1 |
62 | | - number = '%d.%d'%(chapter, question) |
63 | | - lines[i] = '<A NAME="' + number + '"><H3>' + line[n:] |
64 | | - questions.append('<LI><A HREF="#' + \ |
65 | | - number + '">' + line[n:]) |
66 | | - # Add up to 4 continuations of the question |
67 | | - n = len(number) |
68 | | - for j in range(i+1, i+5): |
69 | | - if blankprog.match(lines[j]) >= 0: |
70 | | - lines[j-1] = lines[j-1] + '</H3></A>' |
71 | | - questions[-1] = \ |
72 | | - questions[-1][:-1] + '</A>\n' |
73 | | - break |
74 | | - questions.append(' '*(n+2) + lines[j]) |
75 | | - afterblank = 0 |
76 | | - continue |
77 | | - afterblank = (blankprog.match(line) >= 0) |
78 | | - print 'Inserting list of chapters...' |
79 | | - chapters.append('</OL>\n') |
80 | | - for i in range(len(lines)): |
81 | | - line = lines[i] |
82 | | - if regex.match( |
83 | | - '^This FAQ is divided in the following chapters', |
84 | | - line) >= 0: |
85 | | - i = i+1 |
86 | | - while 1: |
87 | | - line = lines[i] |
88 | | - if indentedorblankprog.match(line) < 0: |
89 | | - break |
90 | | - del lines[i] |
91 | | - lines[i:i] = chapters |
92 | | - break |
93 | | - else: |
94 | | - print '*** Can\'t find header for list of chapters' |
95 | | - print '*** Chapters found:' |
96 | | - for line in chapters: print line, |
97 | | - print 'Inserting list of questions...' |
98 | | - questions.append('</UL></OL>\n') |
99 | | - for i in range(len(lines)): |
100 | | - line = lines[i] |
101 | | - if regex.match('^Here.s an overview of the questions', |
102 | | - line) >= 0: |
103 | | - i = i+1 |
104 | | - while 1: |
105 | | - line = lines[i] |
106 | | - if indentedorblankprog.match(line) < 0: |
107 | | - break |
108 | | - del lines[i] |
109 | | - lines[i:i] = questions |
110 | | - break |
111 | | - else: |
112 | | - print '*** Can\'t find header for list of questions' |
113 | | - print '*** Questions found:' |
114 | | - for line in questions: print line, |
115 | | - # final cleanup |
116 | | - print "Final cleanup..." |
117 | | - doingpre = 0 |
118 | | - for i in range(len(lines)): |
119 | | - # set lines indented by >= 8 spaces using PRE |
120 | | - # blank lines either terminate PRE or separate paragraphs |
121 | | - n = eightblanksprog.match(lines[i]) |
122 | | - if n < 0: n = mailheaderprog.match(lines[i]) |
123 | | - if n >= 0: |
124 | | - if versionprog.match(lines[i]) > 0: |
125 | | - version = string.split(lines[i])[1] |
126 | | - if doingpre == 0: |
127 | | - lines[i] = '<PRE>\n' + lines[i] |
128 | | - doingpre = 1 |
129 | | - continue |
130 | | - n = blankprog.match(lines[i]) |
131 | | - if n >= 0: |
132 | | - # print '*** ', lines[i-1], doingpre |
133 | | - if doingpre == 1: |
134 | | - lines[i] = '</PRE><P>\n' |
135 | | - doingpre = 0 |
136 | | - else: |
137 | | - lines[i] = '<P>\n' |
138 | | - continue |
139 | | - |
140 | | - # & -> & |
141 | | - n = ampprog.search(lines[i]) |
142 | | - if n >= 0: |
143 | | - lines[i] = regsub.gsub(ampprog, '&', lines[i]) |
144 | | - # no continue - there might be other changes to the line... |
145 | | - |
146 | | - # zap all the 'Q.' and 'A.' leaders - what happened to the |
147 | | - # last couple? |
148 | | - n = qprog.search(lines[i]) |
149 | | - if n >= 0: |
150 | | - lines[i] = regsub.sub(qprog, '>', lines[i]) |
151 | | - # no continue - there might be other changes to the line... |
152 | | - |
153 | | - n = aprog.search(lines[i]) |
154 | | - if n >= 0: |
155 | | - lines[i] = regsub.sub(aprog, '', lines[i]) |
156 | | - # no continue - there might be other changes to the line... |
157 | | - |
158 | | - # patch up hard refs to questions |
159 | | - n = qrefprog.search(lines[i]) |
160 | | - if n >= 0: |
161 | | - lines[i] = regsub.sub(qrefprog, |
162 | | - '<A HREF="#\\1">question \\1</A>', lines[i]) |
163 | | - # no continue - there might be other changes to the line... |
164 | | - |
165 | | - # make <URL:...> into actual links |
166 | | - n = urlprog.search(lines[i]) |
167 | | - if n >= 0: |
168 | | - lines[i] = regsub.gsub(urlprog, '<A HREF="\\1">\\1</A>', lines[i]) |
169 | | - # no continue - there might be other changes to the line... |
170 | | - |
171 | | - # make <[email protected]> into <mailto:...> links |
172 | | - n = emailprog.search(lines[i]) |
173 | | - if n >= 0: |
174 | | - lines[i] = regsub.gsub(emailprog, |
175 | | - '<A HREF="mailto:\\1">\\1</A>', lines[i]) |
176 | | - # no continue - there might be other changes to the line... |
177 | | - |
178 | | - lines[0:0] = ['<HTML><HEAD><TITLE>Python Frequently Asked Questions v', |
179 | | - version, |
180 | | - '</TITLE>\n', |
181 | | - '</HEAD><body>\n', |
182 | | - '(This file was generated using\n', |
183 | | - '<A HREF="faq2html.py">faq2html.py</A>.)<P>\n'] |
184 | | - lines.append('<P></BODY></HTML>\n') |
185 | | - |
186 | | - print 'Writing html file...' |
187 | | - f = open(FAQ + '.html', 'w') |
188 | | - for line in lines: |
189 | | - f.write(line) |
190 | | - f.close() |
191 | | - print 'Done.' |
192 | | - |
193 | | -main() |
| 1 | +#!/usr/local/bin/python |
| 2 | + |
| 3 | +# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96) |
| 4 | + |
| 5 | +# Reads a text file given on standard input or named as first argument, and |
| 6 | +# generates HTML 2.0 on standard output. Recognizes these constructions: |
| 7 | +# |
| 8 | +# HTML element pattern at the beginning of a line |
| 9 | +# |
| 10 | +# section heading (<number><period>)+<space> |
| 11 | +# numbered list element <1-2 spaces>(<number><period>)+<space> |
| 12 | +# unnumbered list element <0-2 spaces><hyphen or asterisk><space> |
| 13 | +# preformatted section <more than two spaces> |
| 14 | +# |
| 15 | +# Heading level is determined by the number of (<number><period>) segments. |
| 16 | +# Blank lines force a separation of elements; if none of the above four |
| 17 | +# types is indicated, a new paragraph begins. A line beginning with many |
| 18 | +# spaces is interpreted as a continuation (instead of preformatted) after |
| 19 | +# a list element. Headings are anchored; paragraphs starting with "Q." are |
| 20 | +# emphasized, and those marked with "A." get their first sentence emphasized. |
| 21 | +# |
| 22 | +# Hyperlinks are created from references to: |
| 23 | +# URLs, explicitly marked using <URL:scheme://host...> |
| 24 | +# other questions, of the form "question <number>(<period><number>)*" |
| 25 | +# sections, of the form "section <number>". |
| 26 | + |
| 27 | +import sys, string, regex, regsub, regex_syntax |
| 28 | +regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) |
| 29 | + |
| 30 | +# --------------------------------------------------------- regular expressions |
| 31 | +orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') |
| 32 | +itemprog = regex.compile(' ? ?[-*] +') |
| 33 | +headingprog = regex.compile('([1-9][0-9]*\.)+ +') |
| 34 | +prefmtprog = regex.compile(' ') |
| 35 | +blankprog = regex.compile('^[ \t\r\n]$') |
| 36 | +questionprog = regex.compile(' *Q\. +') |
| 37 | +answerprog = regex.compile(' *A\. +') |
| 38 | +sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') |
| 39 | + |
| 40 | +mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' |
| 41 | + '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) |
| 42 | +urlprog = regex.compile('<URL:([^&]+)>') |
| 43 | +addrprog = regex.compile('<([^>@:]+@[^&@:]+)>') |
| 44 | +qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)') |
| 45 | +srefprog = regex.compile('section +([1-9][0-9]*)') |
| 46 | +entityprog = regex.compile('[&<>]') |
| 47 | + |
| 48 | +# ------------------------------------------------------------ global variables |
| 49 | +body = [] |
| 50 | +ollev = ullev = 0 |
| 51 | +element = content = secnum = version = '' |
| 52 | + |
| 53 | +# ----------------------------------------------------- for making nested lists |
| 54 | +def dnol(): |
| 55 | + global body, ollev |
| 56 | + ollev = ollev + 1 |
| 57 | + if body[-1] == '</li>': del body[-1] |
| 58 | + body.append('<ol>') |
| 59 | + |
| 60 | +def upol(): |
| 61 | + global body, ollev |
| 62 | + ollev = ollev - 1 |
| 63 | + body.append(ollev and '</ol></li>' or '</ol>') |
| 64 | + |
| 65 | +# --------------------------------- output one element and convert its contents |
| 66 | +def spew(clearol=0, clearul=0): |
| 67 | + global content, body, ollev, ullev |
| 68 | + |
| 69 | + if content: |
| 70 | + if entityprog.search(content) > -1: |
| 71 | + content = regsub.gsub('&', '&', content) |
| 72 | + content = regsub.gsub('<', '<', content) |
| 73 | + content = regsub.gsub('>', '>', content) |
| 74 | + |
| 75 | + n = questionprog.match(content) |
| 76 | + if n > 0: |
| 77 | + content = '<em>' + content[n:] + '</em>' |
| 78 | + if ollev: # question reference in index |
| 79 | + fragid = regsub.gsub('^ +|\.? +$', '', secnum) |
| 80 | + content = '<a href="#%s">%s</a>' % (fragid, content) |
| 81 | + |
| 82 | + if element[0] == 'h': # heading in the main text |
| 83 | + fragid = regsub.gsub('^ +|\.? +$', '', secnum) |
| 84 | + content = secnum + '<a name="%s">%s</a>' % (fragid, content) |
| 85 | + |
| 86 | + n = answerprog.match(content) |
| 87 | + if n > 0: # answer paragraph |
| 88 | + content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:]) |
| 89 | + |
| 90 | + body.append('<' + element + '>' + content) |
| 91 | + body.append('</' + element + '>') |
| 92 | + content = '' |
| 93 | + |
| 94 | + while clearol and ollev: upol() |
| 95 | + if clearul and ullev: body.append('</ul>'); ullev = 0 |
| 96 | + |
| 97 | +# ---------------------------------------------------------------- main program |
| 98 | +faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin |
| 99 | +lines = faq.readlines() |
| 100 | + |
| 101 | +for line in lines: |
| 102 | + if line[2:9] == '=======': # <hr> will appear *before* |
| 103 | + body.append('<hr>') # the underlined heading |
| 104 | + continue |
| 105 | + |
| 106 | + n = orditemprog.match(line) |
| 107 | + if n > 0: # make ordered list item |
| 108 | + spew(0, 'clear ul') |
| 109 | + secnum = line[:n] |
| 110 | + level = string.count(secnum, '.') |
| 111 | + while level > ollev: dnol() |
| 112 | + while level < ollev: upol() |
| 113 | + element, content = 'li', line[n:] |
| 114 | + continue |
| 115 | + |
| 116 | + n = itemprog.match(line) |
| 117 | + if n > 0: # make unordered list item |
| 118 | + spew('clear ol', 0) |
| 119 | + if ullev == 0: body.append('<ul>'); ullev = 1 |
| 120 | + element, content = 'li', line[n:] |
| 121 | + continue |
| 122 | + |
| 123 | + n = headingprog.match(line) |
| 124 | + if n > 0: # make heading element |
| 125 | + spew('clear ol', 'clear ul') |
| 126 | + secnum = line[:n] |
| 127 | + sys.stderr.write(line) |
| 128 | + element, content = 'h%d' % string.count(secnum, '.'), line[n:] |
| 129 | + continue |
| 130 | + |
| 131 | + n = 0 |
| 132 | + if not secnum: # haven't hit body yet |
| 133 | + n = mailhdrprog.match(line) |
| 134 | + v = version and -1 or regex.match('Version: ', line) |
| 135 | + if v > 0 and not version: version = line[v:] |
| 136 | + if n <= 0 and element != 'li': # not pre if after a list item |
| 137 | + n = prefmtprog.match(line) |
| 138 | + if n > 0: # make preformatted element |
| 139 | + if element == 'pre': |
| 140 | + content = content + line |
| 141 | + else: |
| 142 | + spew('clear ol', 'clear ul') |
| 143 | + element, content = 'pre', line |
| 144 | + continue |
| 145 | + |
| 146 | + if blankprog.match(line) > 0: # force a new element |
| 147 | + spew() |
| 148 | + element = '' |
| 149 | + elif element: # continue current element |
| 150 | + content = content + line |
| 151 | + else: # no element; make paragraph |
| 152 | + spew('clear ol', 'clear ul') |
| 153 | + element, content = 'p', line |
| 154 | + |
| 155 | +spew() # output last element |
| 156 | + |
| 157 | +body = string.joinfields(body, '') |
| 158 | +body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body) |
| 159 | +body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body) |
| 160 | +body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body) |
| 161 | +body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body) |
| 162 | + |
| 163 | +print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>' |
| 164 | +print '<head><title>Python Frequently-Asked Questions v' + version |
| 165 | +print "</title></head><body>(This file was generated using Ping's" |
| 166 | +print '<a href="faq2html.py">faq2html.py</a>.)' |
| 167 | +print body + '</body></html>' |
0 commit comments