11#!/usr/bin/env python3
22'''Add syntax highlighting to Python source code'''
33
4- __all__ = ['analyze_python' , 'ansi_highlight' , 'default_ansi' ,
5- 'html_highlight' , 'build_html_page' , 'default_css' , 'default_html' ]
6-
74__author__ = 'Raymond Hettinger'
85
9- import keyword , tokenize , cgi , functools
6+ import keyword , tokenize , cgi , re , functools
7+
8+ #### Analyze Python Source #################################
109
1110def is_builtin (s ):
1211 'Return True if s is the name of a builtin'
@@ -60,6 +59,20 @@ def analyze_python(source):
6059 line_upto_token , written = combine_range (lines , written , (erow , ecol ))
6160 yield line_upto_token , '' , ''
6261
62+ #### Raw Output ###########################################
63+
64+ def raw_highlight (classified_text ):
65+ 'Straight text display of text classifications'
66+ result = []
67+ for line_upto_token , kind , line_thru_token in classified_text :
68+ if line_upto_token :
69+ result .append (' plain: %r\n ' % line_upto_token )
70+ if line_thru_token :
71+ result .append ('%15s: %r\n ' % (kind , line_thru_token ))
72+ return '' .join (result )
73+
74+ #### ANSI Output ###########################################
75+
6376default_ansi = {
6477 'comment' : ('\033 [0;31m' , '\033 [0m' ),
6578 'string' : ('\033 [0;32m' , '\033 [0m' ),
@@ -80,6 +93,8 @@ def ansi_highlight(classified_text, colors=default_ansi):
8093 result += [line_upto_token , opener , line_thru_token , closer ]
8194 return '' .join (result )
8295
96+ #### HTML Output ###########################################
97+
8398def html_highlight (classified_text ,opener = '<pre class="python">\n ' , closer = '</pre>\n ' ):
8499 'Convert classified text to an HTML fragment'
85100 result = [opener ]
@@ -131,6 +146,59 @@ def build_html_page(classified_text, title='python',
131146 title = cgi .escape (title )
132147 return html .format (title = title , css = css_str , body = result )
133148
149+ #### LaTeX Output ##########################################
150+
151+ default_latex_colors = {
152+ 'comment' : 'red' ,
153+ 'string' : 'green' ,
154+ 'docstring' : 'green' ,
155+ 'keyword' : 'orange' ,
156+ 'builtin' : 'purple' ,
157+ 'definition' : 'orange' ,
158+ 'defname' : 'blue' ,
159+ 'operator' : 'brown' ,
160+ }
161+
162+ default_latex_document = r'''
163+ \documentclass{article}
164+ \usepackage{alltt}
165+ \usepackage{color}
166+ \usepackage[usenames,dvipsnames]{xcolor}
167+ \usepackage[cm]{fullpage}
168+ \begin{document}
169+ \center{\LARGE{%(title)s}}
170+ \begin{alltt}
171+ %(body)s
172+ \end{alltt}
173+ \end{document}
174+ '''
175+
176+ def latex_escape (s ):
177+ 'Replace LaTeX special characters with their escaped equivalents'
178+ # http://en.wikibooks.org/wiki/LaTeX/Basics#Special_Characters
179+ xlat = {
180+ '#' : r'\#' , '$' : r'\$' , '%' : r'\%' , '^' : r'\textasciicircum{}' ,
181+ '&' : r'\&' , '_' : r'\_' , '{' : r'\{' , '}' : r'\}' , '~' : r'\~{}' ,
182+ '\\ ' : r'\textbackslash{}' ,
183+ }
184+ return re .sub (r'[\\#$%^&_{}~]' , lambda mo : xlat [mo .group ()], s )
185+
186+ def latex_highlight (classified_text , title = 'python' ,
187+ colors = default_latex_colors ,
188+ document = default_latex_document ):
189+ 'Create a complete LaTeX document with colorized source code'
190+ result = []
191+ for line_upto_token , kind , line_thru_token in classified_text :
192+ if kind :
193+ result += [latex_escape (line_upto_token ),
194+ r'{\color{%s}' % colors [kind ],
195+ latex_escape (line_thru_token ),
196+ '}' ]
197+ else :
198+ result += [latex_escape (line_upto_token ),
199+ latex_escape (line_thru_token )]
200+ return default_latex_document % dict (title = title , body = '' .join (result ))
201+
134202
135203if __name__ == '__main__' :
136204 import sys , argparse , webbrowser , os , textwrap
@@ -152,17 +220,23 @@ def build_html_page(classified_text, title='python',
152220
153221 # Create a complete HTML file
154222 $ ./highlight.py -c myfile.py > myfile.html
223+
224+ # Create a PDF using LaTeX
225+ $ ./highlight.py -l myfile.py | pdflatex
226+
155227 ''' ))
156228 parser .add_argument ('sourcefile' , metavar = 'SOURCEFILE' ,
157229 help = 'file containing Python sourcecode' )
158230 parser .add_argument ('-b' , '--browser' , action = 'store_true' ,
159231 help = 'launch a browser to show results' )
160232 parser .add_argument ('-c' , '--complete' , action = 'store_true' ,
161233 help = 'build a complete html webpage' )
234+ parser .add_argument ('-l' , '--latex' , action = 'store_true' ,
235+ help = 'build a LaTeX document' )
236+ parser .add_argument ('-r' , '--raw' , action = 'store_true' ,
237+ help = 'raw parse of categorized text' )
162238 parser .add_argument ('-s' , '--section' , action = 'store_true' ,
163239 help = 'show an HTML section rather than a complete webpage' )
164- parser .add_argument ('-v' , '--verbose' , action = 'store_true' ,
165- help = 'display categorized text to stderr' )
166240 args = parser .parse_args ()
167241
168242 if args .section and (args .browser or args .complete ):
@@ -174,16 +248,14 @@ def build_html_page(classified_text, title='python',
174248 source = f .read ()
175249 classified_text = analyze_python (source )
176250
177- if args .verbose :
178- classified_text = list (classified_text )
179- for line_upto_token , kind , line_thru_token in classified_text :
180- sys .stderr .write ('%15s: %r\n ' % ('leadin' , line_upto_token ))
181- sys .stderr .write ('%15s: %r\n \n ' % (kind , line_thru_token ))
182-
183- if args .complete or args .browser :
251+ if args .raw :
252+ encoded = raw_highlight (classified_text )
253+ elif args .complete or args .browser :
184254 encoded = build_html_page (classified_text , title = sourcefile )
185255 elif args .section :
186256 encoded = html_highlight (classified_text )
257+ elif args .latex :
258+ encoded = latex_highlight (classified_text , title = sourcefile )
187259 else :
188260 encoded = ansi_highlight (classified_text )
189261
0 commit comments