Codestin Search App

200 lines (175 loc) · 6.44 KB
"""Token-related utilities"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from __future__ import annotations
import itertools
import tokenize
from io import StringIO
from keyword import iskeyword
from tokenize import TokenInfo
from typing import Callable, NamedTuple
from collections.abc import Generator
class Token(NamedTuple):
    token: int
    text: str
    start: int
    end: int
    line: str
def generate_tokens(readline: Callable) -> Generator[TokenInfo, None, None]:
    """wrap generate_tkens to catch EOF errors"""
        yield from tokenize.generate_tokens(readline)
    except tokenize.TokenError:
        # catch EOF error
        return
def generate_tokens_catch_errors(
    readline, extra_errors_to_catch: list[str] | None = None
    default_errors_to_catch = [
        "unterminated string literal",
        "invalid non-printable character",
        "after line continuation character",
    assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
    errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
    tokens: list[TokenInfo] = []
        for token in tokenize.generate_tokens(readline):
            tokens.append(token)
            yield token
    except tokenize.TokenError as exc:
        if any(error in exc.args[0] for error in errors_to_catch):
            if tokens:
                start = tokens[-1].start[0], tokens[-1].end[0]
                end = start
                line = tokens[-1].line
            else:
                start = end = (1, 0)
                line = ""
            yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
        else:
            # Catch EOF
            raise
def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
    """Return the line in a cell at a given cursor position
    Used for calling line-based APIs that don't support multi-line input, yet.
    Parameters
    ----------
    cell : str
        multiline block of text
    cursor_pos : integer
        the cursor position
    Returns
    -------
    (line, offset): (string, integer)
        The line with the current cursor, and the character offset of the start of the line.
    offset = 0
    lines = cell.splitlines(True)
    for line in lines:
        next_offset = offset + len(line)
        if not line.endswith("\n"):
            # If the last line doesn't have a trailing newline, treat it as if
            # it does so that the cursor at the end of the line still counts
            # as being on that line.
            next_offset += 1
        if next_offset > cursor_pos:
            break
        offset = next_offset
        line = ""
    return line, offset
def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
    """Get the token at a given cursor
    Used for introspection.
    Function calls are prioritized, so the token for the callable will be returned
    if the cursor is anywhere inside the call.
    Parameters
    ----------
    cell : str
        A block of Python code
    cursor_pos : int
        The location of the cursor in the block where the token should be found
    names: list[str] = []
    call_names: list[str] = []
    closing_call_name: str | None = None
    most_recent_outer_name: str | None = None
    offsets = {1: 0}  # lines start at 1
    intersects_with_cursor = False
    cur_token_is_name = False
    tokens: list[Token | None] = [
        Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
    if not tokens:
        return ""
    for prev_tok, (tok, next_tok) in zip(
        [None] + tokens, itertools.pairwise(tokens + [None])
        # token, text, start, end, line = tup
        start_line, start_col = tok.start
        end_line, end_col = tok.end
        if end_line + 1 not in offsets:
            # keep track of offsets for each line
            lines = tok.line.splitlines(True)
            for lineno, line in enumerate(lines, start_line + 1):
                if lineno not in offsets:
                    offsets[lineno] = offsets[lineno - 1] + len(line)
        closing_call_name = None
        offset = offsets[start_line]
        if offset + start_col > cursor_pos:
            # current token starts after the cursor,
            # don't consume it
            break
        if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
                names
                and prev_tok
                and prev_tok.token == tokenize.OP
                and prev_tok.text == "."
                names[-1] = "%s.%s" % (names[-1], tok.text)
            else:
                names.append(tok.text)
                next_tok is not None
                and next_tok.token == tokenize.OP
                and next_tok.text == "="
                # don't inspect the lhs of an assignment
                names.pop(-1)
                cur_token_is_name = False
            if not call_names:
                most_recent_outer_name = names[-1] if names else None
        elif tok.token == tokenize.OP:
            if tok.text == "(" and names:
                # if we are inside a function call, inspect the function
                call_names.append(names[-1])
            elif tok.text == ")" and call_names:
                # keep track of the most recently popped call_name from the stack
                closing_call_name = call_names.pop(-1)
        if offsets[end_line] + end_col > cursor_pos:
            # we found the cursor, stop reading
            # if the current token intersects directly, use it instead of the call token
            intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
            break
    if cur_token_is_name and intersects_with_cursor:
        return names[-1]
    # if the cursor isn't directly over a name token, use the most recent
    # call name if we can find one
    elif closing_call_name:
        # if we're on a ")", use the most recently popped call name
        return closing_call_name
    elif call_names:
        # otherwise, look for the most recent call name in the stack
        return call_names[-1]
    elif most_recent_outer_name:
        # if we've popped all the call names, use the most recently-seen
        # outer name
        return most_recent_outer_name
    elif names:
        # failing that, use the most recently seen name
        return names[-1]
        # give up
        return ""
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

tokenutil.py

Latest commit

History

tokenutil.py

File metadata and controls