1919# Many thanks for regular expression debugging & authoring are due to:
2020# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
2121# So, who owns the copyright? ;-) How about this:
22- # Copyright 1996-2000 :
22+ # Copyright 1996-2001 :
2323# Mitchell S. Chapman,
2424# Zachary Roadhouse,
2525# Tim Peters,
2626# Just van Rossum
2727
28- __version__ = "0.3.3 "
28+ __version__ = "0.4 "
2929
30- import string , re
30+ import string
31+ import re
3132
3233# First a little helper, since I don't like to repeat things. (Tismer speaking)
3334import string
@@ -43,50 +44,47 @@ def replace(where, what, with):
4344 "break" , "else" , "if" , "or" , "while" ,
4445 "class" , "except" , "import" , "pass" ,
4546 "continue" , "finally" , "in" , "print" ,
46- "def" , "for" , "is" , "raise" ]
47+ "def" , "for" , "is" , "raise" , "yield" ]
4748
4849# Build up a regular expression which will match anything
4950# interesting, including multi-line triple-quoted strings.
50- commentPat = "#. *"
51+ commentPat = r"#[^\n] *"
5152
52- pat = "q[^\q\n ]*\ (\\ \\ [\000 -\377 ][^\q\n ]*\ )*q"
53- quotePat = replace (pat , "q" , "'" ) + "\ |" + replace (pat , 'q' , '"' )
53+ pat = r "q[^\\ q\n]*(\\[\000-\377][^\\ q\n]*)*q"
54+ quotePat = replace (pat , "q" , "'" ) + "|" + replace (pat , 'q' , '"' )
5455
5556# Way to go, Tim!
56- pat = """
57+ pat = r """
5758 qqq
5859 [^\\q]*
59- \ (
60- \( \\ \\ [\000 -\377 ]
61- \ | q
62- \( \\ \\ [\000 -\377 ]
63- \ | [^\ \ q]
64- \ | q
65- \( \\ \\ [\000 -\377 ]
66- \ | [^\\ q]
67- \ )
68- \ )
69- \ )
60+ (
61+ ( \\[\000-\377]
62+ | q
63+ ( \\[\000-\377]
64+ | [^\q]
65+ | q
66+ ( \\[\000-\377]
67+ | [^\\q]
68+ )
69+ )
70+ )
7071 [^\\q]*
71- \ )*
72+ )*
7273 qqq
7374"""
7475pat = string .join (string .split (pat ), '' ) # get rid of whitespace
75- tripleQuotePat = replace (pat , "q" , "'" ) + "\ |" + replace (pat , 'q' , '"' )
76+ tripleQuotePat = replace (pat , "q" , "'" ) + "|" + replace (pat , 'q' , '"' )
7677
7778# Build up a regular expression which matches all and only
7879# Python keywords. This will let us skip the uninteresting
7980# identifier references.
8081# nonKeyPat identifies characters which may legally precede
8182# a keyword pattern.
82- nonKeyPat = "\(^\ |[^a-zA-Z0-9_.\" ']\ )"
83+ nonKeyPat = r"(^ |[^a-zA-Z0-9_.\"'])"
8384
84- keyPat = nonKeyPat + "\("
85- for keyword in keywordsList :
86- keyPat = keyPat + keyword + "\|"
87- keyPat = keyPat [:- 2 ] + "\)" + nonKeyPat
85+ keyPat = nonKeyPat + "(" + "|" .join (keywordsList ) + ")" + nonKeyPat
8886
89- matchPat = commentPat + "\ |" + keyPat + "\ |" + tripleQuotePat + "\ |" + quotePat
87+ matchPat = commentPat + "|" + keyPat + "|" + tripleQuotePat + "|" + quotePat
9088matchRE = re .compile (matchPat )
9189
9290idKeyPat = "[ \t ]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
@@ -111,7 +109,10 @@ def fontify(pytext, searchfrom = 0, searchto = None):
111109 end = searchfrom
112110 while 1 :
113111 m = search (pytext , end )
114- if not m or m .start () >= searchto :
112+ if m is None :
113+ break # EXIT LOOP
114+ start = m .start ()
115+ if start >= searchto :
115116 break # EXIT LOOP
116117 match = m .group (0 )
117118 end = start + len (match )
@@ -132,10 +133,12 @@ def fontify(pytext, searchfrom = 0, searchto = None):
132133 # following identifier.
133134 if match in ["def" , "class" ]:
134135 m = idSearch (pytext , end )
135- if m and m .start () == end :
136- match = m .group (0 )
137- end = start + len (match )
138- tags_append ((identifierTag , start , end , None ))
136+ if m is not None :
137+ start = m .start ()
138+ if start == end :
139+ match = m .group (0 )
140+ end = start + len (match )
141+ tags_append ((identifierTag , start , end , None ))
139142 elif c == "#" :
140143 tags_append ((commentTag , start , end , None ))
141144 else :
0 commit comments