Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[3.6] bpo-23894: Make rb'' strings work in lib2to3 (GH-1724) #1730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions Lib/lib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def maybe(*choices): return group(*choices) + '?'
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
Triple = group(_litprefix + "'''", _litprefix + '"""')
# Single-line ' or " string.
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')

# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
Expand All @@ -95,9 +96,9 @@ def maybe(*choices): return group(*choices) + '?'
Token = Ignore + PlainToken

# First (or only) line of ' or " string.
ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
Expand All @@ -111,6 +112,7 @@ def maybe(*choices): return group(*choices) + '?'
"b'''": single3prog, 'b"""': double3prog,
"ur'''": single3prog, 'ur"""': double3prog,
"br'''": single3prog, 'br"""': double3prog,
"rb'''": single3prog, 'rb"""': double3prog,
"R'''": single3prog, 'R"""': double3prog,
"U'''": single3prog, 'U"""': double3prog,
"B'''": single3prog, 'B"""': double3prog,
Expand All @@ -120,6 +122,9 @@ def maybe(*choices): return group(*choices) + '?'
"bR'''": single3prog, 'bR"""': double3prog,
"Br'''": single3prog, 'Br"""': double3prog,
"BR'''": single3prog, 'BR"""': double3prog,
"rB'''": single3prog, 'rB"""': double3prog,
"Rb'''": single3prog, 'Rb"""': double3prog,
"RB'''": single3prog, 'RB"""': double3prog,
'r': None, 'R': None,
'u': None, 'U': None,
'b': None, 'B': None}
Expand All @@ -132,7 +137,9 @@ def maybe(*choices): return group(*choices) + '?'
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
"uR'''", 'uR"""', "UR'''", 'UR"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""',):
"bR'''", 'bR"""', "BR'''", 'BR"""',
"rb'''", 'rb"""', "Rb'''", 'Rb"""',
"rB'''", 'rB"""', "RB'''", 'RB"""',):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
Expand All @@ -142,7 +149,9 @@ def maybe(*choices): return group(*choices) + '?'
"ur'", 'ur"', "Ur'", 'Ur"',
"uR'", 'uR"', "UR'", 'UR"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"', ):
"bR'", 'bR"', "BR'", 'BR"',
"rb'", 'rb"', "Rb'", 'Rb"',
"rB'", 'rB"', "RB'", 'RB"',):
Copy link
Member

@ericvsmith ericvsmith May 22, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_all_string_prefixes in Lib/tokenize.py computes similar prefixes (without ' or "), but generates it programatically. It's less error prone than listing these prefixes by hand. Plus, if "f" strings are added, the numbers go up dramatically.

You can't use it directly, because it doesn't support "ur" and variants, because they're not supported in 3.x. But maybe you can use a similar strategy.

This isn't a blocker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this is not elegant. Sadly, the list is not exhaustive as the _litprefix regex shows. So the algorithm would need to be custom anyway. I'd count this as a possible future improvement.

single_quoted[t] = t

tabsize = 8
Expand Down
21 changes: 21 additions & 0 deletions Lib/lib2to3/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def test_5(self):
def test_6(self):
self.validate("lst: List[int] = []")


class TestExcept(GrammarTest):
def test_new(self):
s = """
Expand All @@ -338,6 +339,26 @@ def test_old(self):
self.validate(s)


class TestStringLiterals(GrammarTest):
prefixes = ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
"b'", 'b"', "B'", 'B"',
"ur'", 'ur"', "Ur'", 'Ur"',
"uR'", 'uR"', "UR'", 'UR"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"',
"rb'", 'rb"', "Rb'", 'Rb"',
"rB'", 'rB"', "RB'", 'RB"',)

def test_lit(self):
for pre in self.prefixes:
single = "{p}spamspamspam{s}".format(p=pre, s=pre[-1])
self.validate(single)
triple = "{p}{s}{s}eggs{s}{s}{s}".format(p=pre, s=pre[-1])
self.validate(triple)


# Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms
class TestSetLiteral(GrammarTest):
def test_1(self):
Expand Down