Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ee6d0e6

Browse files
authored
Merge pull request #10270 from takluyver/tokenize-indent
Calculate indentation based on tokens, not regexes
2 parents 4007bef + 7b726ac commit ee6d0e6

2 files changed

Lines changed: 141 additions & 50 deletions

File tree

IPython/core/inputsplitter.py

Lines changed: 114 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
# Distributed under the terms of the Modified BSD License.
1919
import ast
2020
import codeop
21+
import io
2122
import re
2223
import sys
24+
import tokenize
2325
import warnings
2426

2527
from IPython.utils.py3compat import cast_unicode
@@ -87,6 +89,113 @@ def num_ini_spaces(s):
8789
else:
8890
return 0
8991

92+
# Fake token types for partial_tokenize:
93+
INCOMPLETE_STRING = tokenize.N_TOKENS
94+
IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95+
96+
# The 2 classes below have the same API as TokenInfo, but don't try to look up
97+
# a token type name that they won't find.
98+
class IncompleteString:
99+
type = exact_type = INCOMPLETE_STRING
100+
def __init__(self, s, start, end, line):
101+
self.s = s
102+
self.start = start
103+
self.end = end
104+
self.line = line
105+
106+
class InMultilineStatement:
107+
type = exact_type = IN_MULTILINE_STATEMENT
108+
def __init__(self, pos, line):
109+
self.s = ''
110+
self.start = self.end = pos
111+
self.line = line
112+
113+
def partial_tokens(s):
114+
"""Iterate over tokens from a possibly-incomplete string of code.
115+
116+
This adds two special token types: INCOMPLETE_STRING and
117+
IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118+
represent the two main ways for code to be incomplete.
119+
"""
120+
readline = io.StringIO(s).readline
121+
token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122+
try:
123+
for token in tokenize.generate_tokens(readline):
124+
yield token
125+
except tokenize.TokenError as e:
126+
# catch EOF error
127+
lines = s.splitlines(keepends=True)
128+
end = len(lines), len(lines[-1])
129+
if 'multi-line string' in e.args[0]:
130+
l, c = start = token.end
131+
s = lines[l-1][c:] + ''.join(lines[l:])
132+
yield IncompleteString(s, start, end, lines[-1])
133+
elif 'multi-line statement' in e.args[0]:
134+
yield InMultilineStatement(end, lines[-1])
135+
else:
136+
raise
137+
138+
def find_next_indent(code):
139+
"""Find the number of spaces for the next line of indentation"""
140+
tokens = list(partial_tokens(code))
141+
if tokens[-1].type == tokenize.ENDMARKER:
142+
tokens.pop()
143+
if not tokens:
144+
return 0
145+
while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
146+
tokens.pop()
147+
148+
if tokens[-1].type == INCOMPLETE_STRING:
149+
# Inside a multiline string
150+
return 0
151+
152+
# Find the indents used before
153+
prev_indents = [0]
154+
def _add_indent(n):
155+
if n != prev_indents[-1]:
156+
prev_indents.append(n)
157+
158+
tokiter = iter(tokens)
159+
for tok in tokiter:
160+
if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161+
_add_indent(tok.end[1])
162+
elif (tok.type == tokenize.NL):
163+
try:
164+
_add_indent(next(tokiter).start[1])
165+
except StopIteration:
166+
break
167+
168+
last_indent = prev_indents.pop()
169+
170+
# If we've just opened a multiline statement (e.g. 'a = ['), indent more
171+
if tokens[-1].type == IN_MULTILINE_STATEMENT:
172+
if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
173+
return last_indent + 4
174+
return last_indent
175+
176+
if tokens[-1].exact_type == tokenize.COLON:
177+
# Line ends with colon - indent
178+
return last_indent + 4
179+
180+
if last_indent:
181+
# Examine the last line for dedent cues - statements like return or
182+
# raise which normally end a block of code.
183+
last_line_starts = 0
184+
for i, tok in enumerate(tokens):
185+
if tok.type == tokenize.NEWLINE:
186+
last_line_starts = i + 1
187+
188+
last_line_tokens = tokens[last_line_starts:]
189+
names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
190+
if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
191+
# Find the most recent indentation less than the current level
192+
for indent in reversed(prev_indents):
193+
if indent < last_indent:
194+
return indent
195+
196+
return last_indent
197+
198+
90199
def last_blank(src):
91200
"""Determine if the input source ends in a blank.
92201
@@ -306,7 +415,7 @@ def push(self, lines):
306415
if source.endswith('\\\n'):
307416
return False
308417

309-
self._update_indent(lines)
418+
self._update_indent()
310419
try:
311420
with warnings.catch_warnings():
312421
warnings.simplefilter('error', SyntaxWarning)
@@ -382,55 +491,10 @@ def push_accepts_more(self):
382491
# General fallback - accept more code
383492
return True
384493

385-
#------------------------------------------------------------------------
386-
# Private interface
387-
#------------------------------------------------------------------------
388-
389-
def _find_indent(self, line):
390-
"""Compute the new indentation level for a single line.
391-
392-
Parameters
393-
----------
394-
line : str
395-
A single new line of non-whitespace, non-comment Python input.
396-
397-
Returns
398-
-------
399-
indent_spaces : int
400-
New value for the indent level (it may be equal to self.indent_spaces
401-
if indentation doesn't change.
402-
403-
full_dedent : boolean
404-
Whether the new line causes a full flush-left dedent.
405-
"""
406-
indent_spaces = self.indent_spaces
407-
full_dedent = self._full_dedent
408-
409-
inisp = num_ini_spaces(line)
410-
if inisp < indent_spaces:
411-
indent_spaces = inisp
412-
if indent_spaces <= 0:
413-
#print 'Full dedent in text',self.source # dbg
414-
full_dedent = True
415-
416-
if line.rstrip()[-1] == ':':
417-
indent_spaces += 4
418-
elif dedent_re.match(line):
419-
indent_spaces -= 4
420-
if indent_spaces <= 0:
421-
full_dedent = True
422-
423-
# Safety
424-
if indent_spaces < 0:
425-
indent_spaces = 0
426-
#print 'safety' # dbg
427-
428-
return indent_spaces, full_dedent
429-
430-
def _update_indent(self, lines):
431-
for line in remove_comments(lines).splitlines():
432-
if line and not line.isspace():
433-
self.indent_spaces, self._full_dedent = self._find_indent(line)
494+
def _update_indent(self):
495+
# self.source always has a trailing newline
496+
self.indent_spaces = find_next_indent(self.source[:-1])
497+
self._full_dedent = (self.indent_spaces == 0)
434498

435499
def _store(self, lines, buffer=None, store='source'):
436500
"""Store one or more lines of input.

IPython/core/tests/test_inputsplitter.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,3 +612,30 @@ def test_incremental(self):
612612
sp.push('\n')
613613
# In this case, a blank line should end the cell magic
614614
nt.assert_false(sp.push_accepts_more()) #2
615+
616+
indentation_samples = [
617+
('a = 1', 0),
618+
('for a in b:', 4),
619+
('def f():', 4),
620+
('def f(): #comment', 4),
621+
('a = ":#not a comment"', 0),
622+
('def f():\n a = 1', 4),
623+
('def f():\n return 1', 0),
624+
('for a in b:\n'
625+
' if a < 0:'
626+
' continue', 3),
627+
('a = {', 4),
628+
('a = {\n'
629+
' 1,', 5),
630+
('b = """123', 0),
631+
('', 0),
632+
('def f():\n pass', 0),
633+
('class Bar:\n def f():\n pass', 4),
634+
('class Bar:\n def f():\n raise', 4),
635+
]
636+
637+
def test_find_next_indent():
638+
for code, exp in indentation_samples:
639+
res = isp.find_next_indent(code)
640+
msg = "{!r} != {!r} (expected)\n Code: {!r}".format(res, exp, code)
641+
assert res == exp, msg

0 commit comments

Comments
 (0)