Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 026af2a

Browse files
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
2 parents f55697c + def0a4c commit 026af2a

3 files changed

Lines changed: 64 additions & 7 deletions

File tree

Lib/lib2to3/fixes/fix_unicode.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,43 @@
1-
"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
1+
r"""Fixer for unicode.
2+
3+
* Changes unicode to str and unichr to chr.
4+
5+
* If "...\u..." is not unicode literal change it into "...\\u...".
6+
7+
* Change u"..." into "...".
28
39
"""
410

5-
import re
611
from ..pgen2 import token
712
from .. import fixer_base
813

914
_mapping = {"unichr" : "chr", "unicode" : "str"}
10-
_literal_re = re.compile(r"[uU][rR]?[\'\"]")
1115

1216
class FixUnicode(fixer_base.BaseFix):
1317
BM_compatible = True
1418
PATTERN = "STRING | 'unicode' | 'unichr'"
1519

20+
def start_tree(self, tree, filename):
21+
super(FixUnicode, self).start_tree(tree, filename)
22+
self.unicode_literals = 'unicode_literals' in tree.future_features
23+
1624
def transform(self, node, results):
1725
if node.type == token.NAME:
1826
new = node.clone()
1927
new.value = _mapping[node.value]
2028
return new
2129
elif node.type == token.STRING:
22-
if _literal_re.match(node.value):
23-
new = node.clone()
24-
new.value = new.value[1:]
25-
return new
30+
val = node.value
31+
if (not self.unicode_literals and val[0] in 'rR\'"' and
32+
'\\' in val):
33+
val = r'\\'.join([
34+
v.replace('\\u', r'\\u').replace('\\U', r'\\U')
35+
for v in val.split(r'\\')
36+
])
37+
if val[0] in 'uU':
38+
val = val[1:]
39+
if val == node.value:
40+
return node
41+
new = node.clone()
42+
new.value = val
43+
return new

Lib/lib2to3/tests/test_fixers.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2883,6 +2883,43 @@ def test_unicode_literal_3(self):
28832883
a = """R'''x''' """
28842884
self.check(b, a)
28852885

2886+
def test_native_literal_escape_u(self):
2887+
b = r"""'\\\u20ac\U0001d121\\u20ac'"""
2888+
a = r"""'\\\\u20ac\\U0001d121\\u20ac'"""
2889+
self.check(b, a)
2890+
2891+
b = r"""r'\\\u20ac\U0001d121\\u20ac'"""
2892+
a = r"""r'\\\\u20ac\\U0001d121\\u20ac'"""
2893+
self.check(b, a)
2894+
2895+
def test_bytes_literal_escape_u(self):
2896+
b = r"""b'\\\u20ac\U0001d121\\u20ac'"""
2897+
a = r"""b'\\\u20ac\U0001d121\\u20ac'"""
2898+
self.check(b, a)
2899+
2900+
b = r"""br'\\\u20ac\U0001d121\\u20ac'"""
2901+
a = r"""br'\\\u20ac\U0001d121\\u20ac'"""
2902+
self.check(b, a)
2903+
2904+
def test_unicode_literal_escape_u(self):
2905+
b = r"""u'\\\u20ac\U0001d121\\u20ac'"""
2906+
a = r"""'\\\u20ac\U0001d121\\u20ac'"""
2907+
self.check(b, a)
2908+
2909+
b = r"""ur'\\\u20ac\U0001d121\\u20ac'"""
2910+
a = r"""r'\\\u20ac\U0001d121\\u20ac'"""
2911+
self.check(b, a)
2912+
2913+
def test_native_unicode_literal_escape_u(self):
2914+
f = 'from __future__ import unicode_literals\n'
2915+
b = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
2916+
a = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
2917+
self.check(b, a)
2918+
2919+
b = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
2920+
a = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
2921+
self.check(b, a)
2922+
28862923
class Test_callable(FixerTestCase):
28872924
fixer = "callable"
28882925

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ Core and Builtins
2020
Library
2121
-------
2222

23+
- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
24+
2325
- Issue #17839: base64.decodebytes and base64.encodebytes now accept any
2426
object that exports a 1 dimensional array of bytes (this means the same
2527
is now also true for base64_codec)

0 commit comments

Comments
 (0)