11#!/usr/bin/env python3
2- '''Add syntax highlighting to Python source code
2+ '''Add syntax highlighting to Python source code'''
33
4- Example command-line calls:
4+ __all__ = ['analyze_python' , 'ansi_highlight' , 'default_ansi' ,
5+ 'html_highlight' , 'build_html_page' , 'default_css' , 'default_html' ]
56
6- # Show syntax highlighted code in the terminal window
7- $ ./highlight.py myfile.py
8-
9- # Colorize myfile.py and display in a browser
10- $ ./highlight.py -b myfile.py
11-
12- # Create an HTML section that can be embedded in an existing webpage
13- ./highlight.py -s myfile.py
14-
15- # Create a complete HTML file
16- $ ./highlight.py -c myfile.py > myfile.html
17-
18- '''
19-
20- __all__ = ['colorize_html' , 'build_page' , 'default_css' , 'default_html' ,
21- 'colorize_ansi' , 'default_ansi' ]
227__author__ = 'Raymond Hettinger'
238
249import keyword , tokenize , cgi , functools
@@ -31,13 +16,16 @@ def combine_range(lines, start, end):
3116 'Join content from a range of lines between start and end'
3217 (srow , scol ), (erow , ecol ) = start , end
3318 if srow == erow :
34- rows = [lines [srow - 1 ][scol :ecol ]]
35- else :
36- rows = [lines [srow - 1 ][scol :]] + lines [srow : erow - 1 ] + [lines [erow - 1 ][:ecol ]]
19+ return lines [srow - 1 ][scol :ecol ], end
20+ rows = [lines [srow - 1 ][scol :]] + lines [srow : erow - 1 ] + [lines [erow - 1 ][:ecol ]]
3721 return '' .join (rows ), end
3822
39- def isolate_tokens (source ):
40- 'Generate chunks of source and identify chunks to be highlighted'
23+ def analyze_python (source ):
24+ '''Generate and classify chunks of Python for syntax highlighting.
25+ Yields tuples in the form: (leadin_text, category, categorized_text).
26+ The final tuple has empty strings for the category and categorized text.
27+
28+ '''
4129 lines = source .splitlines (True )
4230 lines .append ('' )
4331 readline = functools .partial (next , iter (lines ), '' )
@@ -65,36 +53,37 @@ def isolate_tokens(source):
6553 kind = 'keyword'
6654 elif is_builtin (tok_str ) and prev_tok_str != '.' :
6755 kind = 'builtin'
68- line_upto_token , written = combine_range (lines , written , (srow , scol ))
69- line_thru_token , written = combine_range (lines , written , (erow , ecol ))
70- yield kind , line_upto_token , line_thru_token
56+ if kind :
57+ line_upto_token , written = combine_range (lines , written , (srow , scol ))
58+ line_thru_token , written = combine_range (lines , written , (erow , ecol ))
59+ yield line_upto_token , kind , line_thru_token
60+ line_upto_token , written = combine_range (lines , written , (erow , ecol ))
61+ yield line_upto_token , '' , ''
7162
7263default_ansi = {
73- 'comment' : '\033 [0;31m' ,
74- 'string' : '\033 [0;32m' ,
75- 'docstring' : '\033 [0;32m' ,
76- 'keyword' : '\033 [0;33m' ,
77- 'builtin' : '\033 [0;35m' ,
78- 'definition' : '\033 [0;33m' ,
79- 'defname' : '\033 [0;34m' ,
80- 'operator' : '\033 [0;33m' ,
64+ 'comment' : ( '\033 [0;31m' , ' \033 [0m' ) ,
65+ 'string' : ( '\033 [0;32m' , ' \033 [0m' ) ,
66+ 'docstring' : ( '\033 [0;32m' , ' \033 [0m' ) ,
67+ 'keyword' : ( '\033 [0;33m' , ' \033 [0m' ) ,
68+ 'builtin' : ( '\033 [0;35m' , ' \033 [0m' ) ,
69+ 'definition' : ( '\033 [0;33m' , ' \033 [0m' ) ,
70+ 'defname' : ( '\033 [0;34m' , ' \033 [0m' ) ,
71+ 'operator' : ( '\033 [0;33m' , ' \033 [0m' ) ,
8172}
8273
83- def colorize_ansi ( source , colors = default_ansi ):
84- 'Add syntax highlighting to Python source code using ANSI escape sequences'
74+ def ansi_highlight ( classified_text , colors = default_ansi ):
75+ 'Add syntax highlighting to source code using ANSI escape sequences'
8576 # http://en.wikipedia.org/wiki/ANSI_escape_code
8677 result = []
87- for kind , line_upto_token , line_thru_token in isolate_tokens (source ):
88- if kind :
89- result += [line_upto_token , colors [kind ], line_thru_token , '\033 [0m' ]
90- else :
91- result += [line_upto_token , line_thru_token ]
78+ for line_upto_token , kind , line_thru_token in classified_text :
79+ opener , closer = colors .get (kind , ('' , '' ))
80+ result += [line_upto_token , opener , line_thru_token , closer ]
9281 return '' .join (result )
9382
94- def colorize_html ( source ):
95- 'Convert Python source code to an HTML fragment with colorized markup '
96- result = ['<pre class="python"> \n ' ]
97- for kind , line_upto_token , line_thru_token in isolate_tokens ( source ) :
83+ def html_highlight ( classified_text , opener = '<pre class="python"> \n ' , closer = '</pre> \n ' ):
84+ 'Convert classified text to an HTML fragment'
85+ result = [opener ]
86+ for line_upto_token , kind , line_thru_token in classified_text :
9887 if kind :
9988 result += [cgi .escape (line_upto_token ),
10089 '<span class="%s">' % kind ,
@@ -103,7 +92,7 @@ def colorize_html(source):
10392 else :
10493 result += [cgi .escape (line_upto_token ),
10594 cgi .escape (line_thru_token )]
106- result += ['</pre> \n ' ]
95+ result += [closer ]
10796 return '' .join (result )
10897
10998default_css = {
@@ -134,21 +123,38 @@ def colorize_html(source):
134123</html>
135124'''
136125
137- def build_page (source , title = 'python' , css = default_css , html = default_html ):
138- 'Create a complete HTML page with colorized Python source code'
126+ def build_html_page (classified_text , title = 'python' ,
127+ css = default_css , html = default_html ):
128+ 'Create a complete HTML page with colorized source code'
139129 css_str = '\n ' .join (['%s %s' % item for item in css .items ()])
140- result = colorize_html ( source )
130+ result = html_highlight ( classified_text )
141131 title = cgi .escape (title )
142132 return html .format (title = title , css = css_str , body = result )
143133
144134
145135if __name__ == '__main__' :
146- import sys , argparse , webbrowser , os
136+ import sys , argparse , webbrowser , os , textwrap
147137
148138 parser = argparse .ArgumentParser (
149- description = 'Add syntax highlighting to Python source' )
139+ description = 'Add syntax highlighting to Python source code' ,
140+ formatter_class = argparse .RawDescriptionHelpFormatter ,
141+ epilog = textwrap .dedent ('''
142+ examples:
143+
144+ # Show syntax highlighted code in the terminal window
145+ $ ./highlight.py myfile.py
146+
147+ # Colorize myfile.py and display in a browser
148+ $ ./highlight.py -b myfile.py
149+
150+ # Create an HTML section to embed in an existing webpage
151+ ./highlight.py -s myfile.py
152+
153+ # Create a complete HTML file
154+ $ ./highlight.py -c myfile.py > myfile.html
155+ ''' ))
150156 parser .add_argument ('sourcefile' , metavar = 'SOURCEFILE' ,
151- help = 'File containing Python sourcecode' )
157+ help = 'file containing Python sourcecode' )
152158 parser .add_argument ('-b' , '--browser' , action = 'store_true' ,
153159 help = 'launch a browser to show results' )
154160 parser .add_argument ('-c' , '--complete' , action = 'store_true' ,
@@ -164,13 +170,14 @@ def build_page(source, title='python', css=default_css, html=default_html):
164170 sourcefile = args .sourcefile
165171 with open (sourcefile ) as f :
166172 source = f .read ()
173+ classified_text = analyze_python (source )
167174
168175 if args .complete or args .browser :
169- encoded = build_page ( source , title = sourcefile )
176+ encoded = build_html_page ( classified_text , title = sourcefile )
170177 elif args .section :
171- encoded = colorize_html ( source )
178+ encoded = html_highlight ( classified_text )
172179 else :
173- encoded = colorize_ansi ( source )
180+ encoded = ansi_highlight ( classified_text )
174181
175182 if args .browser :
176183 htmlfile = os .path .splitext (os .path .basename (sourcefile ))[0 ] + '.html'
0 commit comments