|
1 | | -"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...". |
| 1 | +r"""Fixer for unicode. |
| 2 | +
|
| 3 | +* Changes unicode to str and unichr to chr. |
| 4 | +
|
| 5 | +* If "...\u..." is not unicode literal change it into "...\\u...". |
| 6 | +
|
| 7 | +* Change u"..." into "...". |
2 | 8 |
|
3 | 9 | """ |
4 | 10 |
|
5 | | -import re |
6 | 11 | from ..pgen2 import token |
7 | 12 | from .. import fixer_base |
8 | 13 |
|
9 | 14 | _mapping = {"unichr" : "chr", "unicode" : "str"} |
10 | | -_literal_re = re.compile(r"[uU][rR]?[\'\"]") |
11 | 15 |
|
12 | 16 | class FixUnicode(fixer_base.BaseFix): |
13 | 17 | BM_compatible = True |
14 | 18 | PATTERN = "STRING | 'unicode' | 'unichr'" |
15 | 19 |
|
| 20 | + def start_tree(self, tree, filename): |
| 21 | + super(FixUnicode, self).start_tree(tree, filename) |
| 22 | + self.unicode_literals = 'unicode_literals' in tree.future_features |
| 23 | + |
16 | 24 | def transform(self, node, results): |
17 | 25 | if node.type == token.NAME: |
18 | 26 | new = node.clone() |
19 | 27 | new.value = _mapping[node.value] |
20 | 28 | return new |
21 | 29 | elif node.type == token.STRING: |
22 | | - if _literal_re.match(node.value): |
23 | | - new = node.clone() |
24 | | - new.value = new.value[1:] |
25 | | - return new |
| 30 | + val = node.value |
| 31 | + if (not self.unicode_literals and val[0] in 'rR\'"' and |
| 32 | + '\\' in val): |
| 33 | + val = r'\\'.join([ |
| 34 | + v.replace('\\u', r'\\u').replace('\\U', r'\\U') |
| 35 | + for v in val.split(r'\\') |
| 36 | + ]) |
| 37 | + if val[0] in 'uU': |
| 38 | + val = val[1:] |
| 39 | + if val == node.value: |
| 40 | + return node |
| 41 | + new = node.clone() |
| 42 | + new.value = val |
| 43 | + return new |
0 commit comments