|
18 | 18 | # Distributed under the terms of the Modified BSD License. |
19 | 19 | import ast |
20 | 20 | import codeop |
| 21 | +import io |
21 | 22 | import re |
22 | 23 | import sys |
| 24 | +import tokenize |
23 | 25 | import warnings |
24 | 26 |
|
25 | 27 | from IPython.utils.py3compat import cast_unicode |
@@ -87,6 +89,113 @@ def num_ini_spaces(s): |
87 | 89 | else: |
88 | 90 | return 0 |
89 | 91 |
|
| 92 | +# Fake token types for partial_tokenize: |
| 93 | +INCOMPLETE_STRING = tokenize.N_TOKENS |
| 94 | +IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1 |
| 95 | + |
| 96 | +# The 2 classes below have the same API as TokenInfo, but don't try to look up |
| 97 | +# a token type name that they won't find. |
| 98 | +class IncompleteString: |
| 99 | + type = exact_type = INCOMPLETE_STRING |
| 100 | + def __init__(self, s, start, end, line): |
| 101 | + self.s = s |
| 102 | + self.start = start |
| 103 | + self.end = end |
| 104 | + self.line = line |
| 105 | + |
| 106 | +class InMultilineStatement: |
| 107 | + type = exact_type = IN_MULTILINE_STATEMENT |
| 108 | + def __init__(self, pos, line): |
| 109 | + self.s = '' |
| 110 | + self.start = self.end = pos |
| 111 | + self.line = line |
| 112 | + |
| 113 | +def partial_tokens(s): |
| 114 | + """Iterate over tokens from a possibly-incomplete string of code. |
| 115 | +
|
| 116 | + This adds two special token types: INCOMPLETE_STRING and |
| 117 | + IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and |
| 118 | + represent the two main ways for code to be incomplete. |
| 119 | + """ |
| 120 | + readline = io.StringIO(s).readline |
| 121 | + token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '') |
| 122 | + try: |
| 123 | + for token in tokenize.generate_tokens(readline): |
| 124 | + yield token |
| 125 | + except tokenize.TokenError as e: |
| 126 | + # catch EOF error |
| 127 | + lines = s.splitlines(keepends=True) |
| 128 | + end = len(lines), len(lines[-1]) |
| 129 | + if 'multi-line string' in e.args[0]: |
| 130 | + l, c = start = token.end |
| 131 | + s = lines[l-1][c:] + ''.join(lines[l:]) |
| 132 | + yield IncompleteString(s, start, end, lines[-1]) |
| 133 | + elif 'multi-line statement' in e.args[0]: |
| 134 | + yield InMultilineStatement(end, lines[-1]) |
| 135 | + else: |
| 136 | + raise |
| 137 | + |
| 138 | +def find_next_indent(code): |
| 139 | + """Find the number of spaces for the next line of indentation""" |
| 140 | + tokens = list(partial_tokens(code)) |
| 141 | + if tokens[-1].type == tokenize.ENDMARKER: |
| 142 | + tokens.pop() |
| 143 | + if not tokens: |
| 144 | + return 0 |
| 145 | + while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}): |
| 146 | + tokens.pop() |
| 147 | + |
| 148 | + if tokens[-1].type == INCOMPLETE_STRING: |
| 149 | + # Inside a multiline string |
| 150 | + return 0 |
| 151 | + |
| 152 | + # Find the indents used before |
| 153 | + prev_indents = [0] |
| 154 | + def _add_indent(n): |
| 155 | + if n != prev_indents[-1]: |
| 156 | + prev_indents.append(n) |
| 157 | + |
| 158 | + tokiter = iter(tokens) |
| 159 | + for tok in tokiter: |
| 160 | + if tok.type in {tokenize.INDENT, tokenize.DEDENT}: |
| 161 | + _add_indent(tok.end[1]) |
| 162 | + elif (tok.type == tokenize.NL): |
| 163 | + try: |
| 164 | + _add_indent(next(tokiter).start[1]) |
| 165 | + except StopIteration: |
| 166 | + break |
| 167 | + |
| 168 | + last_indent = prev_indents.pop() |
| 169 | + |
| 170 | + # If we've just opened a multiline statement (e.g. 'a = ['), indent more |
| 171 | + if tokens[-1].type == IN_MULTILINE_STATEMENT: |
| 172 | + if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}: |
| 173 | + return last_indent + 4 |
| 174 | + return last_indent |
| 175 | + |
| 176 | + if tokens[-1].exact_type == tokenize.COLON: |
| 177 | + # Line ends with colon - indent |
| 178 | + return last_indent + 4 |
| 179 | + |
| 180 | + if last_indent: |
| 181 | + # Examine the last line for dedent cues - statements like return or |
| 182 | + # raise which normally end a block of code. |
| 183 | + last_line_starts = 0 |
| 184 | + for i, tok in enumerate(tokens): |
| 185 | + if tok.type == tokenize.NEWLINE: |
| 186 | + last_line_starts = i + 1 |
| 187 | + |
| 188 | + last_line_tokens = tokens[last_line_starts:] |
| 189 | + names = [t.string for t in last_line_tokens if t.type == tokenize.NAME] |
| 190 | + if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}: |
| 191 | + # Find the most recent indentation less than the current level |
| 192 | + for indent in reversed(prev_indents): |
| 193 | + if indent < last_indent: |
| 194 | + return indent |
| 195 | + |
| 196 | + return last_indent |
| 197 | + |
| 198 | + |
90 | 199 | def last_blank(src): |
91 | 200 | """Determine if the input source ends in a blank. |
92 | 201 |
|
@@ -306,7 +415,7 @@ def push(self, lines): |
306 | 415 | if source.endswith('\\\n'): |
307 | 416 | return False |
308 | 417 |
|
309 | | - self._update_indent(lines) |
| 418 | + self._update_indent() |
310 | 419 | try: |
311 | 420 | with warnings.catch_warnings(): |
312 | 421 | warnings.simplefilter('error', SyntaxWarning) |
@@ -382,55 +491,10 @@ def push_accepts_more(self): |
382 | 491 | # General fallback - accept more code |
383 | 492 | return True |
384 | 493 |
|
385 | | - #------------------------------------------------------------------------ |
386 | | - # Private interface |
387 | | - #------------------------------------------------------------------------ |
388 | | - |
389 | | - def _find_indent(self, line): |
390 | | - """Compute the new indentation level for a single line. |
391 | | -
|
392 | | - Parameters |
393 | | - ---------- |
394 | | - line : str |
395 | | - A single new line of non-whitespace, non-comment Python input. |
396 | | -
|
397 | | - Returns |
398 | | - ------- |
399 | | - indent_spaces : int |
400 | | - New value for the indent level (it may be equal to self.indent_spaces |
401 | | - if indentation doesn't change. |
402 | | -
|
403 | | - full_dedent : boolean |
404 | | - Whether the new line causes a full flush-left dedent. |
405 | | - """ |
406 | | - indent_spaces = self.indent_spaces |
407 | | - full_dedent = self._full_dedent |
408 | | - |
409 | | - inisp = num_ini_spaces(line) |
410 | | - if inisp < indent_spaces: |
411 | | - indent_spaces = inisp |
412 | | - if indent_spaces <= 0: |
413 | | - #print 'Full dedent in text',self.source # dbg |
414 | | - full_dedent = True |
415 | | - |
416 | | - if line.rstrip()[-1] == ':': |
417 | | - indent_spaces += 4 |
418 | | - elif dedent_re.match(line): |
419 | | - indent_spaces -= 4 |
420 | | - if indent_spaces <= 0: |
421 | | - full_dedent = True |
422 | | - |
423 | | - # Safety |
424 | | - if indent_spaces < 0: |
425 | | - indent_spaces = 0 |
426 | | - #print 'safety' # dbg |
427 | | - |
428 | | - return indent_spaces, full_dedent |
429 | | - |
430 | | - def _update_indent(self, lines): |
431 | | - for line in remove_comments(lines).splitlines(): |
432 | | - if line and not line.isspace(): |
433 | | - self.indent_spaces, self._full_dedent = self._find_indent(line) |
| 494 | + def _update_indent(self): |
| 495 | + # self.source always has a trailing newline |
| 496 | + self.indent_spaces = find_next_indent(self.source[:-1]) |
| 497 | + self._full_dedent = (self.indent_spaces == 0) |
434 | 498 |
|
435 | 499 | def _store(self, lines, buffer=None, store='source'): |
436 | 500 | """Store one or more lines of input. |
|
0 commit comments