Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 00c7f85

Browse files
committed
Issue #2134: Add support for tokenize.TokenInfo.exact_type.
1 parent 3f67ec1 commit 00c7f85

4 files changed

Lines changed: 187 additions & 3 deletions

File tree

Doc/library/tokenize.rst

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ implemented in Python. The scanner in this module returns comments as tokens
1515
as well, making it useful for implementing "pretty-printers," including
1616
colorizers for on-screen displays.
1717

18+
To simplify token stream handling, all :ref:`operators` and :ref:`delimiters`
19+
tokens are returned using the generic :data:`token.OP` token type. The exact
20+
type can be determined by checking the ``exact_type`` property on the
21+
:term:`named tuple` returned from :func:`tokenize.tokenize`.
22+
1823
Tokenizing Input
1924
----------------
2025

@@ -36,9 +41,17 @@ The primary entry point is a :term:`generator`:
3641
returned as a :term:`named tuple` with the field names:
3742
``type string start end line``.
3843

44+
The returned :term:`named tuple` has a additional property named
45+
``exact_type`` that contains the exact operator type for
46+
:data:`token.OP` tokens. For all other token types ``exact_type``
47+
equals the named tuple ``type`` field.
48+
3949
.. versionchanged:: 3.1
4050
Added support for named tuples.
4151

52+
.. versionchanged:: 3.3
53+
Added support for ``exact_type``.
54+
4255
:func:`tokenize` determines the source encoding of the file by looking for a
4356
UTF-8 BOM or encoding cookie, according to :pep:`263`.
4457

@@ -131,7 +144,19 @@ It is as simple as:
131144

132145
.. code-block:: sh
133146
134-
python -m tokenize [filename.py]
147+
python -m tokenize [-e] [filename.py]
148+
149+
The following options are accepted:
150+
151+
.. program:: tokenize
152+
153+
.. cmdoption:: -h, --help
154+
155+
show this help message and exit
156+
157+
.. cmdoption:: -e, --exact
158+
159+
display token names using the exact type
135160

136161
If :file:`filename.py` is specified its contents are tokenized to stdout.
137162
Otherwise, tokenization is performed on stdin.
@@ -215,3 +240,29 @@ the name of the token, and the final column is the value of the token (if any)
215240
4,10-4,11: OP ')'
216241
4,11-4,12: NEWLINE '\n'
217242
5,0-5,0: ENDMARKER ''
243+
244+
The exact token type names can be displayed using the ``-e`` option:
245+
246+
.. code-block:: sh
247+
248+
$ python -m tokenize -e hello.py
249+
0,0-0,0: ENCODING 'utf-8'
250+
1,0-1,3: NAME 'def'
251+
1,4-1,13: NAME 'say_hello'
252+
1,13-1,14: LPAR '('
253+
1,14-1,15: RPAR ')'
254+
1,15-1,16: COLON ':'
255+
1,16-1,17: NEWLINE '\n'
256+
2,0-2,4: INDENT ' '
257+
2,4-2,9: NAME 'print'
258+
2,9-2,10: LPAR '('
259+
2,10-2,25: STRING '"Hello, World!"'
260+
2,25-2,26: RPAR ')'
261+
2,26-2,27: NEWLINE '\n'
262+
3,0-3,1: NL '\n'
263+
4,0-4,0: DEDENT ''
264+
4,0-4,9: NAME 'say_hello'
265+
4,9-4,10: LPAR '('
266+
4,10-4,11: RPAR ')'
267+
4,11-4,12: NEWLINE '\n'
268+
5,0-5,0: ENDMARKER ''

Lib/test/test_tokenize.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,11 +567,12 @@
567567

568568
from test import support
569569
from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
570-
STRING, ENDMARKER, tok_name, detect_encoding,
570+
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
571571
open as tokenize_open)
572572
from io import BytesIO
573573
from unittest import TestCase
574574
import os, sys, glob
575+
import token
575576

576577
def dump_tokens(s):
577578
"""Print out the tokens in s in a table format.
@@ -922,6 +923,78 @@ def mock_readline():
922923

923924
self.assertTrue(encoding_used, encoding)
924925

926+
def assertExactTypeEqual(self, opstr, *optypes):
927+
tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
928+
num_optypes = len(optypes)
929+
self.assertEqual(len(tokens), 2 + num_optypes)
930+
self.assertEqual(token.tok_name[tokens[0].exact_type],
931+
token.tok_name[ENCODING])
932+
for i in range(num_optypes):
933+
self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
934+
token.tok_name[optypes[i]])
935+
self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
936+
token.tok_name[token.ENDMARKER])
937+
938+
def test_exact_type(self):
939+
self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
940+
self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
941+
self.assertExactTypeEqual(':', token.COLON)
942+
self.assertExactTypeEqual(',', token.COMMA)
943+
self.assertExactTypeEqual(';', token.SEMI)
944+
self.assertExactTypeEqual('+', token.PLUS)
945+
self.assertExactTypeEqual('-', token.MINUS)
946+
self.assertExactTypeEqual('*', token.STAR)
947+
self.assertExactTypeEqual('/', token.SLASH)
948+
self.assertExactTypeEqual('|', token.VBAR)
949+
self.assertExactTypeEqual('&', token.AMPER)
950+
self.assertExactTypeEqual('<', token.LESS)
951+
self.assertExactTypeEqual('>', token.GREATER)
952+
self.assertExactTypeEqual('=', token.EQUAL)
953+
self.assertExactTypeEqual('.', token.DOT)
954+
self.assertExactTypeEqual('%', token.PERCENT)
955+
self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
956+
self.assertExactTypeEqual('==', token.EQEQUAL)
957+
self.assertExactTypeEqual('!=', token.NOTEQUAL)
958+
self.assertExactTypeEqual('<=', token.LESSEQUAL)
959+
self.assertExactTypeEqual('>=', token.GREATEREQUAL)
960+
self.assertExactTypeEqual('~', token.TILDE)
961+
self.assertExactTypeEqual('^', token.CIRCUMFLEX)
962+
self.assertExactTypeEqual('<<', token.LEFTSHIFT)
963+
self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
964+
self.assertExactTypeEqual('**', token.DOUBLESTAR)
965+
self.assertExactTypeEqual('+=', token.PLUSEQUAL)
966+
self.assertExactTypeEqual('-=', token.MINEQUAL)
967+
self.assertExactTypeEqual('*=', token.STAREQUAL)
968+
self.assertExactTypeEqual('/=', token.SLASHEQUAL)
969+
self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
970+
self.assertExactTypeEqual('&=', token.AMPEREQUAL)
971+
self.assertExactTypeEqual('|=', token.VBAREQUAL)
972+
self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
973+
self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
974+
self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
975+
self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
976+
self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
977+
self.assertExactTypeEqual('//', token.DOUBLESLASH)
978+
self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
979+
self.assertExactTypeEqual('@', token.AT)
980+
981+
self.assertExactTypeEqual('a**2+b**2==c**2',
982+
NAME, token.DOUBLESTAR, NUMBER,
983+
token.PLUS,
984+
NAME, token.DOUBLESTAR, NUMBER,
985+
token.EQEQUAL,
986+
NAME, token.DOUBLESTAR, NUMBER)
987+
self.assertExactTypeEqual('{1, 2, 3}',
988+
token.LBRACE,
989+
token.NUMBER, token.COMMA,
990+
token.NUMBER, token.COMMA,
991+
token.NUMBER,
992+
token.RBRACE)
993+
self.assertExactTypeEqual('^(x & 0x1)',
994+
token.CIRCUMFLEX,
995+
token.LPAR,
996+
token.NAME, token.AMPER, token.NUMBER,
997+
token.RPAR)
925998

926999
__test__ = {"doctests" : doctests, 'decistmt': decistmt}
9271000

Lib/tokenize.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,65 @@
4545
ENCODING = N_TOKENS + 2
4646
tok_name[ENCODING] = 'ENCODING'
4747
N_TOKENS += 3
48+
EXACT_TOKEN_TYPES = {
49+
'(': LPAR,
50+
')': RPAR,
51+
'[': LSQB,
52+
']': RSQB,
53+
':': COLON,
54+
',': COMMA,
55+
';': SEMI,
56+
'+': PLUS,
57+
'-': MINUS,
58+
'*': STAR,
59+
'/': SLASH,
60+
'|': VBAR,
61+
'&': AMPER,
62+
'<': LESS,
63+
'>': GREATER,
64+
'=': EQUAL,
65+
'.': DOT,
66+
'%': PERCENT,
67+
'{': LBRACE,
68+
'}': RBRACE,
69+
'==': EQEQUAL,
70+
'!=': NOTEQUAL,
71+
'<=': LESSEQUAL,
72+
'>=': GREATEREQUAL,
73+
'~': TILDE,
74+
'^': CIRCUMFLEX,
75+
'<<': LEFTSHIFT,
76+
'>>': RIGHTSHIFT,
77+
'**': DOUBLESTAR,
78+
'+=': PLUSEQUAL,
79+
'-=': MINEQUAL,
80+
'*=': STAREQUAL,
81+
'/=': SLASHEQUAL,
82+
'%=': PERCENTEQUAL,
83+
'&=': AMPEREQUAL,
84+
'|=': VBAREQUAL,
85+
'^=': CIRCUMFLEXEQUAL,
86+
'<<=': LEFTSHIFTEQUAL,
87+
'>>=': RIGHTSHIFTEQUAL,
88+
'**=': DOUBLESTAREQUAL,
89+
'//': DOUBLESLASH,
90+
'//=': DOUBLESLASHEQUAL,
91+
'@': AT
92+
}
4893

4994
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
5095
def __repr__(self):
5196
annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
5297
return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
5398
self._replace(type=annotated_type))
5499

100+
@property
101+
def exact_type(self):
102+
if self.type == OP and self.string in EXACT_TOKEN_TYPES:
103+
return EXACT_TOKEN_TYPES[self.string]
104+
else:
105+
return self.type
106+
55107
def group(*choices): return '(' + '|'.join(choices) + ')'
56108
def any(*choices): return group(*choices) + '*'
57109
def maybe(*choices): return group(*choices) + '?'
@@ -549,6 +601,8 @@ def error(message, filename=None, location=None):
549601
parser.add_argument(dest='filename', nargs='?',
550602
metavar='filename.py',
551603
help='the file to tokenize; defaults to stdin')
604+
parser.add_argument('-e', '--exact', dest='exact', action='store_true',
605+
help='display token names using the exact type')
552606
args = parser.parse_args()
553607

554608
try:
@@ -563,9 +617,12 @@ def error(message, filename=None, location=None):
563617

564618
# Output the tokenization
565619
for token in tokens:
620+
token_type = token.type
621+
if args.exact:
622+
token_type = token.exact_type
566623
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
567624
print("%-20s%-15s%-15r" %
568-
(token_range, tok_name[token.type], token.string))
625+
(token_range, tok_name[token_type], token.string))
569626
except IndentationError as err:
570627
line, column = err.args[1][1:3]
571628
error(err.args[0], filename, (line, column))

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,9 @@ Core and Builtins
450450
Library
451451
-------
452452

453+
- Issue #2134: A new attribute that specifies the exact type of token.OP
454+
tokens has been added to tokenize.TokenInfo.
455+
453456
- Issue #13722: Avoid silencing ImportErrors when initializing the codecs
454457
registry.
455458

0 commit comments

Comments
 (0)