Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f81e5b9

Browse files
committed
New module -- converts regex regular expressions to re style.
There are two ways to use this -- as a filter (e.g. using C-U M-| on a regex string literal in an Emacs buffer) or from a Python program which imports this as a module. Read the doc string for more info, and also some caveats (some cases aren't handled right).
1 parent 1fef181 commit f81e5b9

1 file changed

Lines changed: 186 additions & 0 deletions

File tree

Lib/reconvert.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
#! /usr/bin/env python1.5
2+
3+
"""Convert old ("regex") regular expressions to new syntax ("re").
4+
5+
When imported as a module, there are two functions, with their own
6+
strings:
7+
8+
convert(s, syntax=None) -- convert a regex regular expression to re syntax
9+
10+
quote(s) -- return a quoted string literal
11+
12+
When used as a script, read a Python string literal (or any other
13+
expression evaluating to a string) from stdin, and write the
14+
translated expression to stdout as a string literal. Unless stdout is
15+
a tty, no trailing \n is written to stdout. This is done so that it
16+
can be used with Emacs C-U M-| (shell-command-on-region with argument
17+
which filters the region through the shell command).
18+
19+
No attempt has been made at coding for performance.
20+
21+
Translation table...
22+
23+
\( ( (unless RE_NO_BK_PARENS set)
24+
\) ) (unless RE_NO_BK_PARENS set)
25+
\| | (unless RE_NO_BK_VBAR set)
26+
\< \b (not quite the same, but alla...)
27+
\> \b (not quite the same, but alla...)
28+
\` \A
29+
\' \Z
30+
31+
Not translated...
32+
33+
.
34+
^
35+
$
36+
*
37+
+ (unless RE_BK_PLUS_QM set, then to \+)
38+
? (unless RE_BK_PLUS_QM set, then to \?)
39+
\
40+
\b
41+
\B
42+
\w
43+
\W
44+
\1 ... \9
45+
46+
Special cases...
47+
48+
Non-printable characters are always replaced by their 3-digit
49+
escape code (except \t, \n, \r, which use mnemonic escapes)
50+
51+
Newline is turned into | when RE_NEWLINE_OR is set
52+
53+
XXX To be done...
54+
55+
[...] (different treatment of backslashed items?)
56+
[^...] (different treatment of backslashed items?)
57+
^ $ * + ? (in some error contexts these are probably treated differently)
58+
\vDD \DD (in the regex docs but only works when RE_ANSI_HEX set)
59+
60+
"""
61+
62+
63+
import regex
64+
from regex_syntax import * # RE_*
65+
66+
# Default translation table
67+
mastertable = {
68+
r'\<': r'\b',
69+
r'\>': r'\b',
70+
r'\`': r'\A',
71+
r'\'': r'\Z',
72+
r'\(': '(',
73+
r'\)': ')',
74+
r'\|': '|',
75+
'(': r'\(',
76+
')': r'\)',
77+
'|': r'\|',
78+
'\t': r'\t',
79+
'\n': r'\n',
80+
'\r': r'\r',
81+
}
82+
83+
84+
def convert(s, syntax=None):
85+
"""Convert a regex regular expression to re syntax.
86+
87+
The first argument is the regular expression, as a string object,
88+
just like it would be passed to regex.compile(). (I.e., pass the
89+
actual string object -- string quotes must already have been
90+
removed and the standard escape processing has already been done,
91+
e.g. by eval().)
92+
93+
The optional second argument is the regex syntax variant to be
94+
used. This is an integer mask as passed to regex.set_syntax();
95+
the flag bits are defined in regex_syntax. When not specified, or
96+
when None is given, the current regex syntax mask (as retrieved by
97+
regex.get_syntax()) is used -- which is 0 by default.
98+
99+
The return value is a regular expression, as a string object that
100+
could be passed to re.compile(). (I.e., no string quotes have
101+
been added -- use quote() below, or repr().)
102+
103+
The conversion is not always guaranteed to be correct. More
104+
syntactical analysis should be performed to detect borderline
105+
cases and decide what to do with them. For example, 'x*?' is not
106+
translated correctly.
107+
108+
"""
109+
table = mastertable.copy()
110+
if syntax is None:
111+
syntax = regex.get_syntax()
112+
if syntax & RE_NO_BK_PARENS:
113+
del table[r'\('], table[r'\)']
114+
del table['('], table[')']
115+
if syntax & RE_NO_BK_VBAR:
116+
del table[r'\|']
117+
del table['|']
118+
if syntax & RE_BK_PLUS_QM:
119+
table['+'] = r'\+'
120+
table['?'] = r'\?'
121+
table[r'\+'] = '+'
122+
table[r'\?'] = '?'
123+
if syntax & RE_NEWLINE_OR:
124+
table['\n'] = '|'
125+
res = ""
126+
127+
i = 0
128+
end = len(s)
129+
while i < end:
130+
c = s[i]
131+
i = i+1
132+
if c == '\\':
133+
c = s[i]
134+
i = i+1
135+
key = '\\' + c
136+
key = table.get(key, key)
137+
res = res + key
138+
else:
139+
c = table.get(c, c)
140+
res = res + c
141+
return res
142+
143+
144+
def quote(s, quote=None):
145+
"""Convert a string object to a quoted string literal.
146+
147+
This is similar to repr() but will return a "raw" string (r'...'
148+
or r"...") when the string contains backslashes, instead of
149+
doubling all backslashes. The resulting string does *not* always
150+
evaluate to the same string as the original; however it will do
151+
just the right thing when passed into re.compile().
152+
153+
The optional second argument forces the string quote; it must be
154+
a single character which is a valid Python string quote.
155+
156+
"""
157+
if quote is None:
158+
q = "'"
159+
altq = "'"
160+
if q in s and altq not in s:
161+
q = altq
162+
else:
163+
assert quote in ('"', "'")
164+
q = quote
165+
res = q
166+
for c in s:
167+
if c == q: c = '\\' + c
168+
elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
169+
res = res + c
170+
res = res + q
171+
if '\\' in res:
172+
res = 'r' + res
173+
return res
174+
175+
176+
def main():
177+
"""Main program -- called when run as a script."""
178+
import sys
179+
s = eval(sys.stdin.read())
180+
sys.stdout.write(quote(convert(s)))
181+
if sys.stdout.isatty():
182+
sys.stdout.write("\n")
183+
184+
185+
if __name__ == '__main__':
186+
main()

0 commit comments

Comments
 (0)