Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 694f701

Browse files
committed
Rewritten by Ka-Ping Yee.
1 parent ba885ff commit 694f701

1 file changed

Lines changed: 167 additions & 193 deletions

File tree

Misc/faq2html.py

Lines changed: 167 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -1,193 +1,167 @@
1-
#! /usr/local/bin/python
2-
3-
# Convert the Python FAQ to HTML
4-
5-
import string
6-
import regex
7-
import regsub
8-
import sys
9-
import os
10-
11-
FAQ = 'FAQ'
12-
13-
chapterprog = regex.compile('^\([1-9][0-9]*\)\. ')
14-
questionprog = regex.compile('^\([1-9][0-9]*\)\.\([1-9][0-9]*\)\. ')
15-
newquestionprog = regex.compile('^Q\. ')
16-
blankprog = regex.compile('^[ \t]*$')
17-
indentedorblankprog = regex.compile('^\([ \t]+\|[ \t]*$\)')
18-
underlineprog = regex.compile('^==*$')
19-
eightblanksprog = regex.compile('^\( \| *\t\)')
20-
mailheaderprog = regex.compile('^\(Subject\|Newsgroups\|Followup-To\|From\|Reply-To\|Approved\|Archive-name\|Version\|Last-modified\): +')
21-
urlprog = regex.compile('<URL:\([^>]*\)>')
22-
ampprog = regex.compile('&')
23-
aprog = regex.compile('^A\. +')
24-
qprog = regex.compile('>Q\. +')
25-
qrefprog = regex.compile('question +\([0-9]\.[0-9]+\)')
26-
versionprog = regex.compile('^Version: ')
27-
emailprog = regex.compile('<\([^>@:]+@[^>@:]+\)>')
28-
29-
def main():
30-
print 'Reading lines...'
31-
lines = open(FAQ, 'r').readlines()
32-
print 'Renumbering in memory...'
33-
oldlines = lines[:]
34-
after_blank = 1
35-
chapter = 0
36-
question = 0
37-
chapters = ['<OL>']
38-
questions = ['<OL>']
39-
for i in range(len(lines)):
40-
line = lines[i]
41-
if after_blank:
42-
n = chapterprog.match(line)
43-
if n >= 0:
44-
chapter = chapter + 1
45-
if chapter != 1:
46-
questions.append('</UL>\n')
47-
question = 0
48-
lines[i] = '<H2>' + line[n:-1] + '</H2>\n'
49-
chapters.append('<LI> ' + line[n:])
50-
questions.append('<LI> ' + line[n:])
51-
questions.append('<UL>\n')
52-
afterblank = 0
53-
continue
54-
n = underlineprog.match(line)
55-
if n >= 0:
56-
lines[i] = ''
57-
continue
58-
n = questionprog.match(line)
59-
if n < 0: n = newquestionprog.match(line) - 3
60-
if n >= 0:
61-
question = question + 1
62-
number = '%d.%d'%(chapter, question)
63-
lines[i] = '<A NAME="' + number + '"><H3>' + line[n:]
64-
questions.append('<LI><A HREF="#' + \
65-
number + '">' + line[n:])
66-
# Add up to 4 continuations of the question
67-
n = len(number)
68-
for j in range(i+1, i+5):
69-
if blankprog.match(lines[j]) >= 0:
70-
lines[j-1] = lines[j-1] + '</H3></A>'
71-
questions[-1] = \
72-
questions[-1][:-1] + '</A>\n'
73-
break
74-
questions.append(' '*(n+2) + lines[j])
75-
afterblank = 0
76-
continue
77-
afterblank = (blankprog.match(line) >= 0)
78-
print 'Inserting list of chapters...'
79-
chapters.append('</OL>\n')
80-
for i in range(len(lines)):
81-
line = lines[i]
82-
if regex.match(
83-
'^This FAQ is divided in the following chapters',
84-
line) >= 0:
85-
i = i+1
86-
while 1:
87-
line = lines[i]
88-
if indentedorblankprog.match(line) < 0:
89-
break
90-
del lines[i]
91-
lines[i:i] = chapters
92-
break
93-
else:
94-
print '*** Can\'t find header for list of chapters'
95-
print '*** Chapters found:'
96-
for line in chapters: print line,
97-
print 'Inserting list of questions...'
98-
questions.append('</UL></OL>\n')
99-
for i in range(len(lines)):
100-
line = lines[i]
101-
if regex.match('^Here.s an overview of the questions',
102-
line) >= 0:
103-
i = i+1
104-
while 1:
105-
line = lines[i]
106-
if indentedorblankprog.match(line) < 0:
107-
break
108-
del lines[i]
109-
lines[i:i] = questions
110-
break
111-
else:
112-
print '*** Can\'t find header for list of questions'
113-
print '*** Questions found:'
114-
for line in questions: print line,
115-
# final cleanup
116-
print "Final cleanup..."
117-
doingpre = 0
118-
for i in range(len(lines)):
119-
# set lines indented by >= 8 spaces using PRE
120-
# blank lines either terminate PRE or separate paragraphs
121-
n = eightblanksprog.match(lines[i])
122-
if n < 0: n = mailheaderprog.match(lines[i])
123-
if n >= 0:
124-
if versionprog.match(lines[i]) > 0:
125-
version = string.split(lines[i])[1]
126-
if doingpre == 0:
127-
lines[i] = '<PRE>\n' + lines[i]
128-
doingpre = 1
129-
continue
130-
n = blankprog.match(lines[i])
131-
if n >= 0:
132-
# print '*** ', lines[i-1], doingpre
133-
if doingpre == 1:
134-
lines[i] = '</PRE><P>\n'
135-
doingpre = 0
136-
else:
137-
lines[i] = '<P>\n'
138-
continue
139-
140-
# & -> &amp;
141-
n = ampprog.search(lines[i])
142-
if n >= 0:
143-
lines[i] = regsub.gsub(ampprog, '&amp;', lines[i])
144-
# no continue - there might be other changes to the line...
145-
146-
# zap all the 'Q.' and 'A.' leaders - what happened to the
147-
# last couple?
148-
n = qprog.search(lines[i])
149-
if n >= 0:
150-
lines[i] = regsub.sub(qprog, '>', lines[i])
151-
# no continue - there might be other changes to the line...
152-
153-
n = aprog.search(lines[i])
154-
if n >= 0:
155-
lines[i] = regsub.sub(aprog, '', lines[i])
156-
# no continue - there might be other changes to the line...
157-
158-
# patch up hard refs to questions
159-
n = qrefprog.search(lines[i])
160-
if n >= 0:
161-
lines[i] = regsub.sub(qrefprog,
162-
'<A HREF="#\\1">question \\1</A>', lines[i])
163-
# no continue - there might be other changes to the line...
164-
165-
# make <URL:...> into actual links
166-
n = urlprog.search(lines[i])
167-
if n >= 0:
168-
lines[i] = regsub.gsub(urlprog, '<A HREF="\\1">\\1</A>', lines[i])
169-
# no continue - there might be other changes to the line...
170-
171-
# make <[email protected]> into <mailto:...> links
172-
n = emailprog.search(lines[i])
173-
if n >= 0:
174-
lines[i] = regsub.gsub(emailprog,
175-
'<A HREF="mailto:\\1">\\1</A>', lines[i])
176-
# no continue - there might be other changes to the line...
177-
178-
lines[0:0] = ['<HTML><HEAD><TITLE>Python Frequently Asked Questions v',
179-
version,
180-
'</TITLE>\n',
181-
'</HEAD><body>\n',
182-
'(This file was generated using\n',
183-
'<A HREF="faq2html.py">faq2html.py</A>.)<P>\n']
184-
lines.append('<P></BODY></HTML>\n')
185-
186-
print 'Writing html file...'
187-
f = open(FAQ + '.html', 'w')
188-
for line in lines:
189-
f.write(line)
190-
f.close()
191-
print 'Done.'
192-
193-
main()
1+
#!/usr/local/bin/python
2+
3+
# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
4+
5+
# Reads a text file given on standard input or named as first argument, and
6+
# generates HTML 2.0 on standard output. Recognizes these constructions:
7+
#
8+
# HTML element pattern at the beginning of a line
9+
#
10+
# section heading (<number><period>)+<space>
11+
# numbered list element <1-2 spaces>(<number><period>)+<space>
12+
# unnumbered list element <0-2 spaces><hyphen or asterisk><space>
13+
# preformatted section <more than two spaces>
14+
#
15+
# Heading level is determined by the number of (<number><period>) segments.
16+
# Blank lines force a separation of elements; if none of the above four
17+
# types is indicated, a new paragraph begins. A line beginning with many
18+
# spaces is interpreted as a continuation (instead of preformatted) after
19+
# a list element. Headings are anchored; paragraphs starting with "Q." are
20+
# emphasized, and those marked with "A." get their first sentence emphasized.
21+
#
22+
# Hyperlinks are created from references to:
23+
# URLs, explicitly marked using <URL:scheme://host...>
24+
# other questions, of the form "question <number>(<period><number>)*"
25+
# sections, of the form "section <number>".
26+
27+
import sys, string, regex, regsub, regex_syntax
28+
regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)
29+
30+
# --------------------------------------------------------- regular expressions
31+
orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +')
32+
itemprog = regex.compile(' ? ?[-*] +')
33+
headingprog = regex.compile('([1-9][0-9]*\.)+ +')
34+
prefmtprog = regex.compile(' ')
35+
blankprog = regex.compile('^[ \t\r\n]$')
36+
questionprog = regex.compile(' *Q\. +')
37+
answerprog = regex.compile(' *A\. +')
38+
sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
39+
40+
mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
41+
'|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)
42+
urlprog = regex.compile('&lt;URL:([^&]+)&gt;')
43+
addrprog = regex.compile('&lt;([^>@:]+@[^&@:]+)&gt;')
44+
qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')
45+
srefprog = regex.compile('section +([1-9][0-9]*)')
46+
entityprog = regex.compile('[&<>]')
47+
48+
# ------------------------------------------------------------ global variables
49+
body = []
50+
ollev = ullev = 0
51+
element = content = secnum = version = ''
52+
53+
# ----------------------------------------------------- for making nested lists
54+
def dnol():
55+
global body, ollev
56+
ollev = ollev + 1
57+
if body[-1] == '</li>': del body[-1]
58+
body.append('<ol>')
59+
60+
def upol():
61+
global body, ollev
62+
ollev = ollev - 1
63+
body.append(ollev and '</ol></li>' or '</ol>')
64+
65+
# --------------------------------- output one element and convert its contents
66+
def spew(clearol=0, clearul=0):
67+
global content, body, ollev, ullev
68+
69+
if content:
70+
if entityprog.search(content) > -1:
71+
content = regsub.gsub('&', '&amp;', content)
72+
content = regsub.gsub('<', '&lt;', content)
73+
content = regsub.gsub('>', '&gt;', content)
74+
75+
n = questionprog.match(content)
76+
if n > 0:
77+
content = '<em>' + content[n:] + '</em>'
78+
if ollev: # question reference in index
79+
fragid = regsub.gsub('^ +|\.? +$', '', secnum)
80+
content = '<a href="#%s">%s</a>' % (fragid, content)
81+
82+
if element[0] == 'h': # heading in the main text
83+
fragid = regsub.gsub('^ +|\.? +$', '', secnum)
84+
content = secnum + '<a name="%s">%s</a>' % (fragid, content)
85+
86+
n = answerprog.match(content)
87+
if n > 0: # answer paragraph
88+
content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:])
89+
90+
body.append('<' + element + '>' + content)
91+
body.append('</' + element + '>')
92+
content = ''
93+
94+
while clearol and ollev: upol()
95+
if clearul and ullev: body.append('</ul>'); ullev = 0
96+
97+
# ---------------------------------------------------------------- main program
98+
faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin
99+
lines = faq.readlines()
100+
101+
for line in lines:
102+
if line[2:9] == '=======': # <hr> will appear *before*
103+
body.append('<hr>') # the underlined heading
104+
continue
105+
106+
n = orditemprog.match(line)
107+
if n > 0: # make ordered list item
108+
spew(0, 'clear ul')
109+
secnum = line[:n]
110+
level = string.count(secnum, '.')
111+
while level > ollev: dnol()
112+
while level < ollev: upol()
113+
element, content = 'li', line[n:]
114+
continue
115+
116+
n = itemprog.match(line)
117+
if n > 0: # make unordered list item
118+
spew('clear ol', 0)
119+
if ullev == 0: body.append('<ul>'); ullev = 1
120+
element, content = 'li', line[n:]
121+
continue
122+
123+
n = headingprog.match(line)
124+
if n > 0: # make heading element
125+
spew('clear ol', 'clear ul')
126+
secnum = line[:n]
127+
sys.stderr.write(line)
128+
element, content = 'h%d' % string.count(secnum, '.'), line[n:]
129+
continue
130+
131+
n = 0
132+
if not secnum: # haven't hit body yet
133+
n = mailhdrprog.match(line)
134+
v = version and -1 or regex.match('Version: ', line)
135+
if v > 0 and not version: version = line[v:]
136+
if n <= 0 and element != 'li': # not pre if after a list item
137+
n = prefmtprog.match(line)
138+
if n > 0: # make preformatted element
139+
if element == 'pre':
140+
content = content + line
141+
else:
142+
spew('clear ol', 'clear ul')
143+
element, content = 'pre', line
144+
continue
145+
146+
if blankprog.match(line) > 0: # force a new element
147+
spew()
148+
element = ''
149+
elif element: # continue current element
150+
content = content + line
151+
else: # no element; make paragraph
152+
spew('clear ol', 'clear ul')
153+
element, content = 'p', line
154+
155+
spew() # output last element
156+
157+
body = string.joinfields(body, '')
158+
body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body)
159+
body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body)
160+
body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body)
161+
body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body)
162+
163+
print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'
164+
print '<head><title>Python Frequently-Asked Questions v' + version
165+
print "</title></head><body>(This file was generated using Ping's"
166+
print '<a href="faq2html.py">faq2html.py</a>.)'
167+
print body + '</body></html>'

0 commit comments

Comments
 (0)