From e921a809b2fcb802742eb58d704577ab55c92cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 18 Mar 2025 17:32:05 +0100 Subject: [PATCH 01/22] gh-131507: Add support for syntax highlighting in PyREPL --- Lib/_pyrepl/reader.py | 23 ++++-- Lib/_pyrepl/utils.py | 123 +++++++++++++++++++++++++++- Lib/test/test_pyrepl/test_reader.py | 68 ++++++++++++++- 3 files changed, 199 insertions(+), 15 deletions(-) diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 7fc2422dac9c3f..b168c0aa427869 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -27,9 +27,8 @@ from dataclasses import dataclass, field, fields from _colorize import can_colorize, ANSIColors - from . import commands, console, input -from .utils import wlen, unbracket, disp_str +from .utils import wlen, unbracket, disp_str, gen_colors from .trace import trace @@ -38,8 +37,7 @@ from .types import Callback, SimpleContextManager, KeySpec, CommandName -# syntax classes: - +# syntax classes SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) @@ -144,16 +142,17 @@ class Reader: Instance variables of note include: * buffer: - A *list* (*not* a string at the moment :-) containing all the - characters that have been entered. + A per-character list containing all the characters that have been + entered. Does not include color information. * console: Hopefully encapsulates the OS dependent stuff. * pos: A 0-based index into 'buffer' for where the insertion point is. * screeninfo: - Ahem. This list contains some info needed to move the - insertion point around reasonably efficiently. + A list of screen position tuples. Each list element is a tuple + representing information on visible line length for a given line. + Allows for efficient skipping of color escape sequences. * cxy, lxy: the position of the insertion point in screen ... * syntax_table: @@ -316,6 +315,11 @@ def calc_screen(self) -> list[str]: pos -= offset prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + if self.can_colorize: + colors = list(gen_colors(self.get_unicode())) + else: + colors = None lines = "".join(self.buffer[offset:]).split("\n") cursor_found = False lines_beyond_cursor = 0 @@ -343,7 +347,7 @@ def calc_screen(self) -> list[str]: screeninfo.append((0, [])) pos -= line_len + 1 prompt, prompt_len = self.process_prompt(prompt) - chars, char_widths = disp_str(line) + chars, char_widths = disp_str(line, colors, offset) wrapcount = (sum(char_widths) + prompt_len) // self.console.width trace("wrapcount = {wrapcount}", wrapcount=wrapcount) if wrapcount == 0 or not char_widths: @@ -567,6 +571,7 @@ def insert(self, text: str | list[str]) -> None: def update_cursor(self) -> None: """Move the cursor to reflect changes in self.pos""" self.cxy = self.pos2xy() + trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy) self.console.move_cursor(*self.cxy) def after_command(self, cmd: Command) -> None: diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 7437fbe1ab9371..8b57c05368ec00 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -2,12 +2,56 @@ import unicodedata import functools +from idlelib import colorizer +from typing import cast, Iterator, Literal, Match, NamedTuple, Pattern, Self +from _colorize import ANSIColors + from .types import CharBuffer, CharWidths from .trace import trace ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +COLORIZE_RE: Pattern[str] = colorizer.prog +IDENTIFIER_RE: Pattern[str] = colorizer.idprog +IDENTIFIERS_AFTER = {"def", "class"} +COLORIZE_GROUP_NAME_MAP: dict[str, str] = colorizer.prog_group_name_to_tag + +type ColorTag = ( + Literal["KEYWORD"] + | Literal["BUILTIN"] + | Literal["COMMENT"] + | Literal["STRING"] + | Literal["DEFINITION"] + | Literal["SYNC"] +) + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + +class ColorSpan(NamedTuple): + span: Span + tag: ColorTag + + +TAG_TO_ANSI: dict[ColorTag, str] = { + "KEYWORD": ANSIColors.BOLD_BLUE, + "BUILTIN": ANSIColors.CYAN, + "COMMENT": ANSIColors.RED, + "STRING": ANSIColors.GREEN, + "DEFINITION": ANSIColors.BOLD_WHITE, + "SYNC": ANSIColors.RESET, +} @functools.cache @@ -41,17 +85,61 @@ def unbracket(s: str, including_content: bool = False) -> str: return s.translate(ZERO_WIDTH_TRANS) -def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: - r"""Decompose the input buffer into a printable variant. +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + for match in COLORIZE_RE.finditer(buffer): + yield from gen_color_spans(match) + + +def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]: + """Generate non-empty color spans.""" + for tag, data in re_match.groupdict().items(): + if not data: + continue + span = Span.from_re(re_match, tag) + tag = COLORIZE_GROUP_NAME_MAP.get(tag, tag) + yield ColorSpan(span, cast(ColorTag, tag)) + if data in IDENTIFIERS_AFTER: + if name_match := IDENTIFIER_RE.match(re_match.string, span.end + 1): + span = Span.from_re(name_match, 1) + yield ColorSpan(span, "DEFINITION") + + +def disp_str( + buffer: str, colors: list[ColorSpan] | None = None, start_index: int = 0 +) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant with applied colors. Returns a tuple of two lists: - - the first list is the input buffer, character by character; + - the first list is the input buffer, character by character, with color + escape codes added (while those codes contain multiple ASCII characters, + each code is considered atomic *and is attached for the corresponding + visible character*); - the second list is the visible width of each character in the input buffer. + Note on colors: + - The `colors` list, if provided, is partially consumed within. We're using + a list and not a generator since we need to hold onto the current + unfinished span between calls to disp_str in case of multiline strings. + - The `colors` list is computed from the start of the input block. `buffer` + is only a subset of that input block, a single line within. This is why + we need `start_index` to inform us which position is the start of `buffer` + actually within user input. This allows us to match color spans correctly. + Examples: >>> utils.disp_str("a = 9") (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + + >>> line = "while 1:" + >>> colors = list(utils.gen_colors(line)) + >>> utils.disp_str(line, colors=colors) + (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) + """ chars: CharBuffer = [] char_widths: CharWidths = [] @@ -59,7 +147,20 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: if not buffer: return chars, char_widths - for c in buffer: + while colors and colors[0].span.end < start_index: + # move past irrelevant spans + colors.pop(0) + + pre_color = "" + post_color = "" + if colors and colors[0].span.start < start_index: + # looks like we're continuing a previous color (e.g. a multiline str) + pre_color = TAG_TO_ANSI[colors[0].tag] + + for i, c in enumerate(buffer, start_index): + if colors and colors[0].span.start == i: # new color starts now + pre_color = TAG_TO_ANSI[colors[0].tag] + if c == "\x1a": # CTRL-Z on Windows chars.append(c) char_widths.append(2) @@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: else: chars.append(c) char_widths.append(str_width(c)) + + if colors and colors[0].span.end == i: # current color ends now + post_color = TAG_TO_ANSI["SYNC"] + colors.pop(0) + + chars[-1] = pre_color + chars[-1] + post_color + pre_color = "" + post_color = "" + + if colors and colors[0].span.start < i and colors[0].span.end > i: + # even though the current color should be continued, reset it for now. + # the next call to `disp_str()` will revive it. + chars[-1] += TAG_TO_ANSI["SYNC"] + trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 109cb603ae88b6..9ad2ffe5d1685c 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -1,14 +1,20 @@ import itertools import functools import rlcompleter +from textwrap import dedent from unittest import TestCase from unittest.mock import MagicMock from .support import handle_all_events, handle_events_narrow_console from .support import ScreenEqualMixin, code_to_events -from .support import prepare_reader, prepare_console +from .support import prepare_reader, prepare_console, reader_force_colors from _pyrepl.console import Event from _pyrepl.reader import Reader +from _pyrepl.utils import TAG_TO_ANSI + + +colors = {k[0].lower(): v for k, v in TAG_TO_ANSI.items() if k != "SYNC"} +colors["z"] = TAG_TO_ANSI["SYNC"] class TestReader(ScreenEqualMixin, TestCase): @@ -123,8 +129,9 @@ def test_setpos_for_xy_simple(self): def test_control_characters(self): code = 'flag = "🏳️‍🌈"' events = code_to_events(code) - reader, _ = handle_all_events(events) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True) + self.assert_screen_equal(reader, 'flag = {s}"🏳️\\u200d🌈"{z}'.format(**colors)) def test_setpos_from_xy_multiple_lines(self): # fmt: off @@ -355,3 +362,60 @@ def test_setpos_from_xy_for_non_printing_char(self): reader, _ = handle_all_events(events) reader.setpos_from_xy(8, 0) self.assertEqual(reader.pos, 7) + + def test_syntax_highlighting_basic(self): + code = dedent( + """\ + import re, sys + def funct(case: str = sys.platform) -> None: + match = re.search( + "(me)", + ''' + Come on + Come on now + You know that it's time to emerge + ''', + ) + match case: + case "emscripten": print("on the web") + case "ios" | "android": print("on the phone") + case _: print('arms around', match.group(1)) + """ + ) + expected = dedent( + """\ + {k}import{z} re, sys + {a}{k}def{z} {d}funct{z}(case: {b}str{z} = sys.platform) -> {k}None{z}: + match = re.search( + {s}"(me)"{z}, + {s}'''{z} + {s} Come on{z} + {s} Come on now{z} + {s} You know that it's time to emerge{z} + {s} '''{z}, + ) + {k}match{z} case: + {k}case{z} {s}"emscripten"{z}: {b}print{z}({s}"on the web"{z}) + {k}case{z} {s}"ios"{z} | {s}"android"{z}: {b}print{z}({s}"on the phone"{z}) + {k}case{z} {k}_{z}: {b}print{z}({s}'arms around'{z}, match.group(1)) + """ + ) + expected_sync = expected.format(a="", **colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected_sync) + self.assertEqual(reader.pos, 2**7 + 2**8) + self.assertEqual(reader.cxy, (0, 14)) + + async_msg = "{k}async{z} ".format(**colors) + expected_async = expected.format(a=async_msg, **colors) + more_events = itertools.chain( + code_to_events(code), + [Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))] * 13, + code_to_events("async "), + ) + reader, _ = handle_all_events(more_events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, expected_async) + self.assertEqual(reader.pos, 21) + self.assertEqual(reader.cxy, (6, 1)) From fb95911d044b4ed85ea244868f133f4a7d908263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 21 Mar 2025 19:03:47 +0100 Subject: [PATCH 02/22] Add Blurb --- .../2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst new file mode 100644 index 00000000000000..354a116c53371b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst @@ -0,0 +1 @@ +PyREPL now supports syntax highlighing. Contributed by Łukasz Langa. From b428513fbab0b8513ec58f7ab1cf14633669f5dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 21 Mar 2025 20:34:01 +0100 Subject: [PATCH 03/22] Fix irrelevant Windows tests --- Lib/test/test_pyrepl/test_windows_console.py | 23 +++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index 69f2d5af2a4dce..9d04bcff80b64e 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -11,7 +11,8 @@ from unittest import TestCase from unittest.mock import MagicMock, call -from .support import handle_all_events, code_to_events +from .support import handle_all_events, code_to_events, reader_no_colors +from .support import prepare_reader as default_prepare_reader try: from _pyrepl.console import Event, Console @@ -47,14 +48,22 @@ def console(self, events, **kwargs) -> Console: setattr(console, key, val) return console - def handle_events(self, events: Iterable[Event], **kwargs): - return handle_all_events(events, partial(self.console, **kwargs)) + def handle_events( + self, + events: Iterable[Event], + prepare_console=None, + prepare_reader=None, + **kwargs, + ): + prepare_console = prepare_console or partial(self.console, **kwargs) + prepare_reader = prepare_reader or default_prepare_reader + return handle_all_events(events, prepare_console, prepare_reader) def handle_events_narrow(self, events): return self.handle_events(events, width=5) - def handle_events_short(self, events): - return self.handle_events(events, height=1) + def handle_events_short(self, events, **kwargs): + return self.handle_events(events, height=1, **kwargs) def handle_events_height_3(self, events): return self.handle_events(events, height=3) @@ -249,7 +258,9 @@ def test_resize_bigger_on_multiline_function(self): # fmt: on events = itertools.chain(code_to_events(code)) - reader, console = self.handle_events_short(events) + reader, console = self.handle_events_short( + events, prepare_reader=reader_no_colors + ) console.height = 2 console.getheightwidth = MagicMock(lambda _: (2, 80)) From 2bdcd06c37c5364aa53b9fb334748fdda52a0983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 29 Apr 2025 17:48:57 +0200 Subject: [PATCH 04/22] Replace idlelib.colorizer with a faster solution --- Lib/_pyrepl/reader.py | 1 + Lib/_pyrepl/utils.py | 140 +++++++++++++++++++++++++--- Lib/test/test_pyrepl/test_reader.py | 104 ++++++++++++++++++--- 3 files changed, 221 insertions(+), 24 deletions(-) diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index b168c0aa427869..a7a8e448236419 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -320,6 +320,7 @@ def calc_screen(self) -> list[str]: colors = list(gen_colors(self.get_unicode())) else: colors = None + trace("colors = {colors}", colors=colors) lines = "".join(self.buffer[offset:]).split("\n") cursor_found = False lines_beyond_cursor = 0 diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 8b57c05368ec00..ccdc463a4bfd43 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -1,9 +1,13 @@ +import builtins +import functools +import keyword import re +import token as T +import tokenize import unicodedata -import functools -from idlelib import colorizer -from typing import cast, Iterator, Literal, Match, NamedTuple, Pattern, Self +from io import StringIO +from typing import cast, Iterator, Literal, Match, NamedTuple, Self from _colorize import ANSIColors from .types import CharBuffer, CharWidths @@ -12,17 +16,19 @@ ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) -COLORIZE_RE: Pattern[str] = colorizer.prog -IDENTIFIER_RE: Pattern[str] = colorizer.idprog IDENTIFIERS_AFTER = {"def", "class"} -COLORIZE_GROUP_NAME_MAP: dict[str, str] = colorizer.prog_group_name_to_tag +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} + type ColorTag = ( Literal["KEYWORD"] | Literal["BUILTIN"] | Literal["COMMENT"] | Literal["STRING"] + | Literal["NUMBER"] + | Literal["OP"] | Literal["DEFINITION"] + | Literal["SOFT_KEYWORD"] | Literal["SYNC"] ) @@ -38,6 +44,13 @@ def from_re(cls, m: Match[str], group: int | str) -> Self: re_span = m.span(group) return cls(re_span[0], re_span[1] - 1) + @classmethod + def from_token(cls, token: tokenize.TokenInfo, line_len: list[int]) -> Self: + return cls( + line_len[token.start[0] - 1] + token.start[1], + line_len[token.end[0] - 1] + token.end[1] - 1, + ) + class ColorSpan(NamedTuple): span: Span @@ -49,7 +62,10 @@ class ColorSpan(NamedTuple): "BUILTIN": ANSIColors.CYAN, "COMMENT": ANSIColors.RED, "STRING": ANSIColors.GREEN, + "NUMBER": ANSIColors.YELLOW, + "OP": ANSIColors.RESET, "DEFINITION": ANSIColors.BOLD_WHITE, + "SOFT_KEYWORD": ANSIColors.BOLD_GREEN, # FIXME: change to RESET "SYNC": ANSIColors.RESET, } @@ -86,17 +102,19 @@ def unbracket(s: str, including_content: bool = False) -> str: def gen_colors(buffer: str) -> Iterator[ColorSpan]: - """Returns a list of index spans to color using the given color tag. - - The input `buffer` should be a valid start of a Python code block, i.e. - it cannot be a block starting in the middle of a multiline string. - """ + # FIXME: delete this previous version, now only kept for debugging. + from idlelib import colorizer + COLORIZE_RE = colorizer.prog for match in COLORIZE_RE.finditer(buffer): yield from gen_color_spans(match) def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]: - """Generate non-empty color spans.""" + # FIXME: delete this previous version, now only kept for debugging. + from idlelib import colorizer + COLORIZE_GROUP_NAME_MAP = colorizer.prog_group_name_to_tag + IDENTIFIER_RE = colorizer.idprog + for tag, data in re_match.groupdict().items(): if not data: continue @@ -109,6 +127,104 @@ def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]: yield ColorSpan(span, "DEFINITION") +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + sio = StringIO(buffer) + line_lengths = [0] + [len(line) for line in sio.readlines()] + # make line_lengths cumulative + for i in range(1, len(line_lengths)): + line_lengths[i] += line_lengths[i-1] + + sio.seek(0) + gen = tokenize.generate_tokens(sio.readline) + last_emitted = None + try: + for color in gen_colors_from_token_stream(gen, line_lengths): + yield color + last_emitted = color + except tokenize.TokenError as te: + yield from recover_unterminated_string( + te, line_lengths, last_emitted, buffer + ) + + +def recover_unterminated_string( + exc: tokenize.TokenError, + line_lengths: list[int], + last_emitted: ColorTag | None, + buffer: str, +) -> Iterator[ColorSpan]: + msg, loc = exc.args + if ( + msg.startswith("unterminated string literal") + or msg.startswith("unterminated f-string literal") + or msg.startswith("EOF in multi-line string") + or msg.startswith("unterminated triple-quoted f-string literal") + ): + start = line_lengths[loc[0] - 1] + loc[1] - 1 + end = line_lengths[-1] - 1 + + # in case FSTRING_START was already emitted + if last_emitted and start <= last_emitted.span.start: + trace("before last emitted = {s}", s=start) + start = last_emitted.span.end + 1 + + span = Span(start, end) + trace("yielding span {a} -> {b}", a=span.start, b=span.end) + yield ColorSpan(span, "STRING") + else: + trace( + "unhandled token error({buffer}) = {te}", + buffer=repr(buffer), + te=str(exc), + ) + + +def gen_colors_from_token_stream( + token_generator: Iterator[tokenize.TokenInfo], + line_lengths: list[int], +) -> Iterator[ColorSpan]: + is_def_name = False + for token in token_generator: + if token.start == token.end: + continue + + match token.type: + case T.STRING | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "STRING") + case T.COMMENT: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "COMMENT") + case T.NUMBER: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "NUMBER") + case T.OP: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "OP") + case T.NAME: + if is_def_name: + is_def_name = False + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "DEFINITION") + elif keyword.iskeyword(token.string): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "KEYWORD") + if token.string in IDENTIFIERS_AFTER: + is_def_name = True + elif keyword.issoftkeyword(token.string): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "SOFT_KEYWORD") + elif token.string in BUILTINS: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "BUILTIN") + # TODO: soft keywords + + def disp_str( buffer: str, colors: list[ColorSpan] | None = None, start_index: int = 0 ) -> tuple[CharBuffer, CharWidths]: diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 9ad2ffe5d1685c..a504d169bdac30 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -13,8 +13,8 @@ from _pyrepl.utils import TAG_TO_ANSI -colors = {k[0].lower(): v for k, v in TAG_TO_ANSI.items() if k != "SYNC"} -colors["z"] = TAG_TO_ANSI["SYNC"] +overrides = {"SYNC": "z", "SOFT_KEYWORD": "K"} +colors = {overrides.get(k, k[0].lower()): v for k, v in TAG_TO_ANSI.items()} class TestReader(ScreenEqualMixin, TestCase): @@ -384,20 +384,20 @@ def funct(case: str = sys.platform) -> None: ) expected = dedent( """\ - {k}import{z} re, sys - {a}{k}def{z} {d}funct{z}(case: {b}str{z} = sys.platform) -> {k}None{z}: - match = re.search( - {s}"(me)"{z}, + {k}import{z} re{o},{z} sys + {a}{k}def{z} {d}funct{z}{o}({z}{K}case{z}{o}:{z} {b}str{z} {o}={z} sys{o}.{z}platform{o}){z} {o}->{z} {k}None{z}{o}:{z} + {K}match{z} {o}={z} re{o}.{z}search{o}({z} + {s}"(me)"{z}{o},{z} {s}'''{z} {s} Come on{z} {s} Come on now{z} {s} You know that it's time to emerge{z} - {s} '''{z}, - ) - {k}match{z} case: - {k}case{z} {s}"emscripten"{z}: {b}print{z}({s}"on the web"{z}) - {k}case{z} {s}"ios"{z} | {s}"android"{z}: {b}print{z}({s}"on the phone"{z}) - {k}case{z} {k}_{z}: {b}print{z}({s}'arms around'{z}, match.group(1)) + {s} '''{z}{o},{z} + {o}){z} + {K}match{z} {K}case{z}{o}:{z} + {K}case{z} {s}"emscripten"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the web"{z}{o}){z} + {K}case{z} {s}"ios"{z} {o}|{z} {s}"android"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the phone"{z}{o}){z} + {K}case{z} {K}_{z}{o}:{z} {b}print{z}{o}({z}{s}'arms around'{z}{o},{z} {K}match{z}{o}.{z}group{o}({z}{n}1{z}{o}){z}{o}){z} """ ) expected_sync = expected.format(a="", **colors) @@ -419,3 +419,83 @@ def funct(case: str = sys.platform) -> None: self.assert_screen_equal(reader, expected_async) self.assertEqual(reader.pos, 21) self.assertEqual(reader.cxy, (6, 1)) + + def test_syntax_highlighting_incomplete_string_first_line(self): + code = dedent( + """\ + def unfinished_function(arg: str = "still typing + """ + ) + expected = dedent( + """\ + {k}def{z} {d}unfinished_function{z}{o}({z}arg{o}:{z} {b}str{z} {o}={z} {s}"still typing{z} + """ + ).format(**colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected) + + def test_syntax_highlighting_incomplete_string_another_line(self): + code = dedent( + """\ + def unfinished_function( + arg: str = "still typing + """ + ) + expected = dedent( + """\ + {k}def{z} {d}unfinished_function{z}{o}({z} + arg{o}:{z} {b}str{z} {o}={z} {s}"still typing{z} + """ + ).format(**colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected) + + def test_syntax_highlighting_incomplete_multiline_string(self): + code = dedent( + """\ + def unfinished_function(): + '''Still writing + the docstring + """ + ) + expected = dedent( + """\ + {k}def{z} {d}unfinished_function{z}{o}({z}{o}){z}{o}:{z} + {s}'''Still writing{z} + {s} the docstring{z} + """ + ).format(**colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected) + + def test_syntax_highlighting_incomplete_fstring(self): + code = dedent( + """\ + def unfinished_function(): + var = f"Single-quote but { + 1 + + + 1 + } multi-line! + """ + ) + expected = dedent( + """\ + {k}def{z} {d}unfinished_function{z}{o}({z}{o}){z}{o}:{z} + var {o}={z} {s}f"{z}{s}Single-quote but {z}{o}{OB}{z} + {n}1{z} + {o}+{z} + {n}1{z} + {o}{CB}{z}{s} multi-line!{z} + """ + ).format(OB="{", CB="}", **colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected) \ No newline at end of file From 8c70c4587287dec51aa1756b0a16d3652a96b428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 29 Apr 2025 21:07:02 +0200 Subject: [PATCH 05/22] Slurp the entire input buffer before refreshing display during bracketed paste --- Lib/_pyrepl/commands.py | 27 ++++++++++++++++----------- Lib/_pyrepl/reader.py | 16 ++++------------ Lib/_pyrepl/readline.py | 4 ---- Lib/_pyrepl/simple_interact.py | 1 - Lib/_pyrepl/unix_console.py | 20 ++------------------ Lib/_pyrepl/utils.py | 2 +- 6 files changed, 23 insertions(+), 47 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index cbb6d85f683257..1f6a5e83cf6b4f 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -31,6 +31,7 @@ # finishing # [completion] +from .trace import trace # types if False: @@ -471,19 +472,23 @@ def do(self) -> None: class paste_mode(Command): - def do(self) -> None: self.reader.paste_mode = not self.reader.paste_mode self.reader.dirty = True -class enable_bracketed_paste(Command): - def do(self) -> None: - self.reader.paste_mode = True - self.reader.in_bracketed_paste = True - -class disable_bracketed_paste(Command): - def do(self) -> None: - self.reader.paste_mode = False - self.reader.in_bracketed_paste = False - self.reader.dirty = True +class perform_bracketed_paste(Command): + def do(self) -> None: + done = "\x1b[201~" + data = "" + import time + start = time.time() + trace("bracketed pasting starts") + while done not in data: + self.reader.console.wait(100) + ev = self.reader.console.getpending() + data += ev.data + trace("len(data) = {d}", d=len(data)) + trace("bracketed pasting done in {s:.2f}s", s=time.time() - start) + self.reader.insert(data.replace(done, "")) + self.reader.last_refresh_cache.invalidated = True diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index a7a8e448236419..cbb2fd7ee364fc 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -103,8 +103,7 @@ def make_default_commands() -> dict[CommandName, type[Command]]: (r"\M-9", "digit-arg"), (r"\M-\n", "accept"), ("\\\\", "self-insert"), - (r"\x1b[200~", "enable_bracketed_paste"), - (r"\x1b[201~", "disable_bracketed_paste"), + (r"\x1b[200~", "perform-bracketed-paste"), (r"\x03", "ctrl-c"), ] + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] @@ -202,7 +201,6 @@ class Reader: dirty: bool = False finished: bool = False paste_mode: bool = False - in_bracketed_paste: bool = False commands: dict[str, type[Command]] = field(default_factory=make_default_commands) last_command: type[Command] | None = None syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) @@ -220,7 +218,6 @@ class Reader: ## cached metadata to speed up screen refreshes @dataclass class RefreshCache: - in_bracketed_paste: bool = False screen: list[str] = field(default_factory=list) screeninfo: list[tuple[int, list[int]]] = field(init=False) line_end_offsets: list[int] = field(default_factory=list) @@ -234,7 +231,6 @@ def update_cache(self, screen: list[str], screeninfo: list[tuple[int, list[int]]], ) -> None: - self.in_bracketed_paste = reader.in_bracketed_paste self.screen = screen.copy() self.screeninfo = screeninfo.copy() self.pos = reader.pos @@ -247,8 +243,7 @@ def valid(self, reader: Reader) -> bool: return False dimensions = reader.console.width, reader.console.height dimensions_changed = dimensions != self.dimensions - paste_changed = reader.in_bracketed_paste != self.in_bracketed_paste - return not (dimensions_changed or paste_changed) + return not dimensions_changed def get_cached_location(self, reader: Reader) -> tuple[int, int]: if self.invalidated: @@ -350,7 +345,7 @@ def calc_screen(self) -> list[str]: prompt, prompt_len = self.process_prompt(prompt) chars, char_widths = disp_str(line, colors, offset) wrapcount = (sum(char_widths) + prompt_len) // self.console.width - trace("wrapcount = {wrapcount}", wrapcount=wrapcount) + # trace("wrapcount = {wrapcount}", wrapcount=wrapcount) if wrapcount == 0 or not char_widths: offset += line_len + 1 # Takes all of the line plus the newline last_refresh_line_end_offsets.append(offset) @@ -484,7 +479,7 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: 'lineno'.""" if self.arg is not None and cursor_on_line: prompt = f"(arg: {self.arg}) " - elif self.paste_mode and not self.in_bracketed_paste: + elif self.paste_mode: prompt = "(paste) " elif "\n" in self.buffer: if lineno == 0: @@ -639,9 +634,6 @@ def update_screen(self) -> None: def refresh(self) -> None: """Recalculate and refresh the screen.""" - if self.in_bracketed_paste and self.buffer and not self.buffer[-1] == "\n": - return - # this call sets up self.cxy, so call it first. self.screen = self.calc_screen() self.console.refresh(self.screen, self.cxy) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 9d58829faf11f0..560a9db192169e 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -276,10 +276,6 @@ def do(self) -> None: r = self.reader # type: ignore[assignment] r.dirty = True # this is needed to hide the completion menu, if visible - if self.reader.in_bracketed_paste: - r.insert("\n") - return - # if there are already several lines and the cursor # is not on the last one, always insert a new \n. text = r.get_unicode() diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index 4c74466118ba97..e2274629b651b9 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -157,7 +157,6 @@ def maybe_run_command(statement: str) -> bool: r.pos = len(r.get_unicode()) r.dirty = True r.refresh() - r.in_bracketed_paste = False console.write("\nKeyboardInterrupt\n") console.resetbuffer() except MemoryError: diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index 96379bc20f3357..07b160d23246df 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -150,8 +150,6 @@ def __init__( self.pollob = poll() self.pollob.register(self.input_fd, select.POLLIN) - self.input_buffer = b"" - self.input_buffer_pos = 0 curses.setupterm(term or None, self.output_fd) self.term = term @@ -199,22 +197,8 @@ def _my_getstr(cap: str, optional: bool = False) -> bytes | None: self.event_queue = EventQueue(self.input_fd, self.encoding) self.cursor_visible = 1 - def more_in_buffer(self) -> bool: - return bool( - self.input_buffer - and self.input_buffer_pos < len(self.input_buffer) - ) - def __read(self, n: int) -> bytes: - if not self.more_in_buffer(): - self.input_buffer = os.read(self.input_fd, 10000) - - ret = self.input_buffer[self.input_buffer_pos : self.input_buffer_pos + n] - self.input_buffer_pos += len(ret) - if self.input_buffer_pos >= len(self.input_buffer): - self.input_buffer = b"" - self.input_buffer_pos = 0 - return ret + return os.read(self.input_fd, n) def change_encoding(self, encoding: str) -> None: @@ -422,7 +406,6 @@ def wait(self, timeout: float | None = None) -> bool: """ return ( not self.event_queue.empty() - or self.more_in_buffer() or bool(self.pollob.poll(timeout)) ) @@ -525,6 +508,7 @@ def getpending(self): e.raw += e.raw amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + trace("getpending({a})", a=amount) raw = self.__read(amount) data = str(raw, self.encoding, "replace") e.data += data diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index ccdc463a4bfd43..f8e641d0295dc1 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -304,5 +304,5 @@ def disp_str( # the next call to `disp_str()` will revive it. chars[-1] += TAG_TO_ANSI["SYNC"] - trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) + # trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths From 4d7ae3658260d973c3da4936131acd854043217c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 29 Apr 2025 21:26:46 +0200 Subject: [PATCH 06/22] Implement getpending() on Windows --- Lib/_pyrepl/windows_console.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 17942c8df0731a..74fd556a6a01bb 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -521,7 +521,17 @@ def forgetinput(self) -> None: def getpending(self) -> Event: """Return the characters that have been typed but not yet processed.""" - return Event("key", "", b"") + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + + rec = self._read_input(False) + if rec and rec.EventType == KEY_EVENT: + key_event = rec.Event.KeyEvent + e.data += key_event.uChar.UnicodeChar + return e def wait(self, timeout: float | None) -> bool: """Wait for an event.""" From 9585bd6bb271cfc5362ee162e43be70e1dc42f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 29 Apr 2025 22:29:14 +0200 Subject: [PATCH 07/22] Adapt tests --- Lib/test/test_pyrepl/test_pyrepl.py | 33 +++++++++++++++-------- Lib/test/test_pyrepl/test_reader.py | 5 ++-- Lib/test/test_pyrepl/test_unix_console.py | 4 ++- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 75a5afad562ef2..93029ab6e080ba 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -45,6 +45,7 @@ def run_repl( cmdline_args: list[str] | None = None, cwd: str | None = None, skip: bool = False, + timeout: float = SHORT_TIMEOUT, ) -> tuple[str, int]: temp_dir = None if cwd is None: @@ -52,7 +53,12 @@ def run_repl( cwd = temp_dir.name try: return self._run_repl( - repl_input, env=env, cmdline_args=cmdline_args, cwd=cwd, skip=skip, + repl_input, + env=env, + cmdline_args=cmdline_args, + cwd=cwd, + skip=skip, + timeout=timeout, ) finally: if temp_dir is not None: @@ -66,6 +72,7 @@ def _run_repl( cmdline_args: list[str] | None, cwd: str, skip: bool, + timeout: float, ) -> tuple[str, int]: assert pty master_fd, slave_fd = pty.openpty() @@ -103,7 +110,7 @@ def _run_repl( os.write(master_fd, repl_input.encode("utf-8")) output = [] - while select.select([master_fd], [], [], SHORT_TIMEOUT)[0]: + while select.select([master_fd], [], [], timeout)[0]: try: data = os.read(master_fd, 1024).decode("utf-8") if not data: @@ -114,12 +121,12 @@ def _run_repl( else: os.close(master_fd) process.kill() - process.wait(timeout=SHORT_TIMEOUT) + process.wait(timeout=timeout) self.fail(f"Timeout while waiting for output, got: {''.join(output)}") os.close(master_fd) try: - exit_code = process.wait(timeout=SHORT_TIMEOUT) + exit_code = process.wait(timeout=timeout) except subprocess.TimeoutExpired: process.kill() exit_code = process.wait() @@ -1561,25 +1568,29 @@ def test_readline_history_file(self): def test_history_survive_crash(self): env = os.environ.copy() - commands = "1\nexit()\n" - output, exit_code = self.run_repl(commands, env=env, skip=True) with tempfile.NamedTemporaryFile() as hfile: env["PYTHON_HISTORY"] = hfile.name - commands = "spam\nimport time\ntime.sleep(1000)\npreved\n" + + commands = "1\n2\n3\nexit()\n" + output, exit_code = self.run_repl(commands, env=env, skip=True) + + commands = "spam\nimport time\ntime.sleep(1000)\nquit\n" try: - self.run_repl(commands, env=env) + self.run_repl(commands, env=env, timeout=3) except AssertionError: pass history = pathlib.Path(hfile.name).read_text() + self.assertIn("2", history) + self.assertIn("exit()", history) self.assertIn("spam", history) - self.assertIn("time", history) + self.assertIn("import time", history) self.assertNotIn("sleep", history) - self.assertNotIn("preved", history) + self.assertNotIn("quit", history) def test_keyboard_interrupt_after_isearch(self): - output, exit_code = self.run_repl(["\x12", "\x03", "exit"]) + output, exit_code = self.run_repl("\x12\x03exit\n") self.assertEqual(exit_code, 0) def test_prompt_after_help(self): diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index a504d169bdac30..fb8fabc0470381 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -7,7 +7,8 @@ from .support import handle_all_events, handle_events_narrow_console from .support import ScreenEqualMixin, code_to_events -from .support import prepare_reader, prepare_console, reader_force_colors +from .support import prepare_console, reader_force_colors +from .support import reader_no_colors as prepare_reader from _pyrepl.console import Event from _pyrepl.reader import Reader from _pyrepl.utils import TAG_TO_ANSI @@ -131,7 +132,7 @@ def test_control_characters(self): events = code_to_events(code) reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True) - self.assert_screen_equal(reader, 'flag = {s}"🏳️\\u200d🌈"{z}'.format(**colors)) + self.assert_screen_equal(reader, 'flag {o}={z} {s}"🏳️\\u200d🌈"{z}'.format(**colors)) def test_setpos_from_xy_multiple_lines(self): # fmt: off diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index 2f5c150402b8f4..7acb84a94f7224 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -33,10 +33,12 @@ def unix_console(events, **kwargs): handle_events_unix_console = partial( handle_all_events, - prepare_console=partial(unix_console), + prepare_reader=reader_no_colors, + prepare_console=unix_console, ) handle_events_narrow_unix_console = partial( handle_all_events, + prepare_reader=reader_no_colors, prepare_console=partial(unix_console, width=5), ) handle_events_short_unix_console = partial( From b1f25575410086cd7fff0ae4881c5b3b360006a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 00:03:30 +0200 Subject: [PATCH 08/22] Support soft keywords (fight fire with fire) --- Lib/_pyrepl/utils.py | 130 ++++++++++++++++++++++++--------- Lib/_pyrepl/windows_console.py | 3 +- 2 files changed, 96 insertions(+), 37 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index f8e641d0295dc1..c7b19fa681af5b 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -6,8 +6,10 @@ import tokenize import unicodedata +from collections import deque from io import StringIO -from typing import cast, Iterator, Literal, Match, NamedTuple, Self +from tokenize import TokenInfo as TI +from typing import TYPE_CHECKING, Iterable, Iterator, Literal, Match, NamedTuple, Self from _colorize import ANSIColors from .types import CharBuffer, CharWidths @@ -19,7 +21,6 @@ IDENTIFIERS_AFTER = {"def", "class"} BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} - type ColorTag = ( Literal["KEYWORD"] | Literal["BUILTIN"] @@ -45,7 +46,7 @@ def from_re(cls, m: Match[str], group: int | str) -> Self: return cls(re_span[0], re_span[1] - 1) @classmethod - def from_token(cls, token: tokenize.TokenInfo, line_len: list[int]) -> Self: + def from_token(cls, token: TI, line_len: list[int]) -> Self: return cls( line_len[token.start[0] - 1] + token.start[1], line_len[token.end[0] - 1] + token.end[1] - 1, @@ -65,7 +66,7 @@ class ColorSpan(NamedTuple): "NUMBER": ANSIColors.YELLOW, "OP": ANSIColors.RESET, "DEFINITION": ANSIColors.BOLD_WHITE, - "SOFT_KEYWORD": ANSIColors.BOLD_GREEN, # FIXME: change to RESET + "SOFT_KEYWORD": ANSIColors.BOLD_BLUE, "SYNC": ANSIColors.RESET, } @@ -101,32 +102,6 @@ def unbracket(s: str, including_content: bool = False) -> str: return s.translate(ZERO_WIDTH_TRANS) -def gen_colors(buffer: str) -> Iterator[ColorSpan]: - # FIXME: delete this previous version, now only kept for debugging. - from idlelib import colorizer - COLORIZE_RE = colorizer.prog - for match in COLORIZE_RE.finditer(buffer): - yield from gen_color_spans(match) - - -def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]: - # FIXME: delete this previous version, now only kept for debugging. - from idlelib import colorizer - COLORIZE_GROUP_NAME_MAP = colorizer.prog_group_name_to_tag - IDENTIFIER_RE = colorizer.idprog - - for tag, data in re_match.groupdict().items(): - if not data: - continue - span = Span.from_re(re_match, tag) - tag = COLORIZE_GROUP_NAME_MAP.get(tag, tag) - yield ColorSpan(span, cast(ColorTag, tag)) - if data in IDENTIFIERS_AFTER: - if name_match := IDENTIFIER_RE.match(re_match.string, span.end + 1): - span = Span.from_re(name_match, 1) - yield ColorSpan(span, "DEFINITION") - - def gen_colors(buffer: str) -> Iterator[ColorSpan]: """Returns a list of index spans to color using the given color tag. @@ -141,7 +116,7 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: sio.seek(0) gen = tokenize.generate_tokens(sio.readline) - last_emitted = None + last_emitted: ColorSpan | None = None try: for color in gen_colors_from_token_stream(gen, line_lengths): yield color @@ -155,7 +130,7 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: def recover_unterminated_string( exc: tokenize.TokenError, line_lengths: list[int], - last_emitted: ColorTag | None, + last_emitted: ColorSpan | None, buffer: str, ) -> Iterator[ColorSpan]: msg, loc = exc.args @@ -185,11 +160,15 @@ def recover_unterminated_string( def gen_colors_from_token_stream( - token_generator: Iterator[tokenize.TokenInfo], + token_generator: Iterator[TI], line_lengths: list[int], ) -> Iterator[ColorSpan]: + token_window = prev_next_window(token_generator) + is_def_name = False - for token in token_generator: + paren_level = 0 + for prev_token, token, next_token in token_window: + assert token is not None if token.start == token.end: continue @@ -204,6 +183,10 @@ def gen_colors_from_token_stream( span = Span.from_token(token, line_lengths) yield ColorSpan(span, "NUMBER") case T.OP: + if token.string == "(": + paren_level += 1 + elif token.string == ")": + paren_level -= 1 span = Span.from_token(token, line_lengths) yield ColorSpan(span, "OP") case T.NAME: @@ -216,13 +199,64 @@ def gen_colors_from_token_stream( yield ColorSpan(span, "KEYWORD") if token.string in IDENTIFIERS_AFTER: is_def_name = True - elif keyword.issoftkeyword(token.string): + elif ( + keyword.issoftkeyword(token.string) + and paren_level == 0 + and is_soft_keyword_used(prev_token, token, next_token) + ): span = Span.from_token(token, line_lengths) yield ColorSpan(span, "SOFT_KEYWORD") elif token.string in BUILTINS: span = Span.from_token(token, line_lengths) yield ColorSpan(span, "BUILTIN") - # TODO: soft keywords + + +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} +keyword_first_sets_case = {"False", "None", "True"} + + +def is_soft_keyword_used(*tokens: TI | None) -> bool: + """Returns True if the current token is a keyword in this context. + + For the `*tokens` to match anything, they have to be a three-tuple of + (previous, current, next). + """ + trace("is_soft_keyword_used{t}", t=tokens) + match tokens: + case ( + None | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NUMBER | T.STRING | T.FSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "+" | "[" | "{" | "~" | "...") + ): + return True + case ( + None | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_match + return True + case ( + None | TI(T.INDENT) | TI(string=":"), + TI(string="case"), + TI(T.NUMBER | T.STRING | T.FSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "[" | "{") + ): + return True + case ( + None | TI(T.INDENT) | TI(string=":"), + TI(string="case"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_case + return True + case (TI(string="case"), TI(string="_"), TI(string=":")): + return True + case _: + return False def disp_str( @@ -306,3 +340,27 @@ def disp_str( # trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths + + +def prev_next_window[T]( + iterable: Iterable[T] +) -> Iterator[tuple[T | None, ...]]: + """Generates three-tuples of (previous, current, next) items. + + On the first iteration previous is None. On the last iteration next + is None. In case of exception next is None and the exception is re-raised + on a subsequent next() call. + + Inspired by `sliding_window` from `itertools` recipes. + """ + + iterator = iter(iterable) + window = deque((None, next(iterator)), maxlen=3) + try: + for x in iterator: + window.append(x) + yield tuple(window) + except Exception: + window.append(None) + yield tuple(window) + raise \ No newline at end of file diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 74fd556a6a01bb..923b5b69cffa42 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -525,7 +525,8 @@ def getpending(self) -> Event: while not self.event_queue.empty(): e2 = self.event_queue.get() - e.data += e2.data + if e2: + e.data += e2.data rec = self._read_input(False) if rec and rec.EventType == KEY_EVENT: From 01e1129886a1d9b55f9649abe990fea594d35d4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 00:11:38 +0200 Subject: [PATCH 09/22] Fix test --- Lib/_pyrepl/utils.py | 8 ++++---- Lib/test/test_pyrepl/test_reader.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index c7b19fa681af5b..62c3e8ca3386c4 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -224,14 +224,14 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: trace("is_soft_keyword_used{t}", t=tokens) match tokens: case ( - None | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="match"), TI(T.NUMBER | T.STRING | T.FSTRING_START) | TI(T.OP, string="(" | "*" | "-" | "+" | "[" | "{" | "~" | "...") ): return True case ( - None | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="match"), TI(T.NAME, string=s) ): @@ -239,14 +239,14 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: return s in keyword_first_sets_match return True case ( - None | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="case"), TI(T.NUMBER | T.STRING | T.FSTRING_START) | TI(T.OP, string="(" | "*" | "-" | "[" | "{") ): return True case ( - None | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="case"), TI(T.NAME, string=s) ): diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index fb8fabc0470381..3e36fe23c88741 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -386,8 +386,8 @@ def funct(case: str = sys.platform) -> None: expected = dedent( """\ {k}import{z} re{o},{z} sys - {a}{k}def{z} {d}funct{z}{o}({z}{K}case{z}{o}:{z} {b}str{z} {o}={z} sys{o}.{z}platform{o}){z} {o}->{z} {k}None{z}{o}:{z} - {K}match{z} {o}={z} re{o}.{z}search{o}({z} + {a}{k}def{z} {d}funct{z}{o}({z}case{o}:{z} {b}str{z} {o}={z} sys{o}.{z}platform{o}){z} {o}->{z} {k}None{z}{o}:{z} + match {o}={z} re{o}.{z}search{o}({z} {s}"(me)"{z}{o},{z} {s}'''{z} {s} Come on{z} @@ -395,10 +395,10 @@ def funct(case: str = sys.platform) -> None: {s} You know that it's time to emerge{z} {s} '''{z}{o},{z} {o}){z} - {K}match{z} {K}case{z}{o}:{z} + {K}match{z} case{o}:{z} {K}case{z} {s}"emscripten"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the web"{z}{o}){z} {K}case{z} {s}"ios"{z} {o}|{z} {s}"android"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the phone"{z}{o}){z} - {K}case{z} {K}_{z}{o}:{z} {b}print{z}{o}({z}{s}'arms around'{z}{o},{z} {K}match{z}{o}.{z}group{o}({z}{n}1{z}{o}){z}{o}){z} + {K}case{z} {K}_{z}{o}:{z} {b}print{z}{o}({z}{s}'arms around'{z}{o},{z} match{o}.{z}group{o}({z}{n}1{z}{o}){z}{o}){z} """ ) expected_sync = expected.format(a="", **colors) From 20eff49682075da3862ae933c4a06e8923122219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 00:21:43 +0200 Subject: [PATCH 10/22] Remove unnecessary import --- Lib/_pyrepl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 62c3e8ca3386c4..71ab8fb1740642 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -9,7 +9,7 @@ from collections import deque from io import StringIO from tokenize import TokenInfo as TI -from typing import TYPE_CHECKING, Iterable, Iterator, Literal, Match, NamedTuple, Self +from typing import Iterable, Iterator, Literal, Match, NamedTuple, Self from _colorize import ANSIColors from .types import CharBuffer, CharWidths From 8d3648a080dde06083c0f4bedcf286e6df8c3042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 00:33:37 +0200 Subject: [PATCH 11/22] Add test for prev_next_window --- Lib/_pyrepl/utils.py | 3 ++- Lib/test/test_pyrepl/test_utils.py | 37 +++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 71ab8fb1740642..41d5a369c7878a 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -361,6 +361,7 @@ def prev_next_window[T]( window.append(x) yield tuple(window) except Exception: + raise + finally: window.append(None) yield tuple(window) - raise \ No newline at end of file diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 0d59968206a613..959a3bbea1ef14 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -1,6 +1,6 @@ from unittest import TestCase -from _pyrepl.utils import str_width, wlen +from _pyrepl.utils import str_width, wlen, prev_next_window class TestUtils(TestCase): @@ -25,3 +25,38 @@ def test_wlen(self): self.assertEqual(wlen('hello'), 5) self.assertEqual(wlen('hello' + '\x1a'), 7) + + def test_prev_next_window(self): + def gen_normal(): + yield 1 + yield 2 + yield 3 + yield 4 + + pnw = prev_next_window(gen_normal()) + self.assertEqual(next(pnw), (None, 1, 2)) + self.assertEqual(next(pnw), (1, 2, 3)) + self.assertEqual(next(pnw), (2, 3, 4)) + self.assertEqual(next(pnw), (3, 4, None)) + with self.assertRaises(StopIteration): + next(pnw) + + def gen_short(): + yield 1 + + pnw = prev_next_window(gen_short()) + self.assertEqual(next(pnw), (None, 1, None)) + with self.assertRaises(StopIteration): + next(pnw) + + def gen_raise(): + yield from gen_normal() + 1/0 + + pnw = prev_next_window(gen_raise()) + self.assertEqual(next(pnw), (None, 1, 2)) + self.assertEqual(next(pnw), (1, 2, 3)) + self.assertEqual(next(pnw), (2, 3, 4)) + self.assertEqual(next(pnw), (3, 4, None)) + with self.assertRaises(ZeroDivisionError): + next(pnw) From 656fea32bc4696f8d741aa818a182e1a07c7f7ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 10:47:56 +0100 Subject: [PATCH 12/22] =?UTF-8?q?Windows:=20bracketed=20pasting=20of=20448?= =?UTF-8?q?692=20chars=20done=20in=202.38s=20=E2=9C=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/_pyrepl/commands.py | 7 +++++-- Lib/_pyrepl/windows_console.py | 25 +++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 1f6a5e83cf6b4f..4d80e503502f3f 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -488,7 +488,10 @@ def do(self) -> None: self.reader.console.wait(100) ev = self.reader.console.getpending() data += ev.data - trace("len(data) = {d}", d=len(data)) - trace("bracketed pasting done in {s:.2f}s", s=time.time() - start) + trace( + "bracketed pasting of {l} chars done in {s:.2f}s", + l=len(data), + s=time.time() - start, + ) self.reader.insert(data.replace(done, "")) self.reader.last_refresh_cache.invalidated = True diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 923b5b69cffa42..7bcdac8167c800 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -426,6 +426,18 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec + def _read_input_bulk(self, block: bool, n: int) -> tuple[INPUT_RECORD, int]: + rec = (n * INPUT_RECORD)() + read = DWORD() + + if not block and not self.wait(timeout=0): + return rec, 0 + + if not ReadConsoleInput(InHandle, rec, n, read): + raise WinError(GetLastError()) + + return rec, read.value + def get_event(self, block: bool = True) -> Event | None: """Return an Event instance. Returns None if |block| is false and there is no event pending, otherwise waits for the @@ -528,10 +540,15 @@ def getpending(self) -> Event: if e2: e.data += e2.data - rec = self._read_input(False) - if rec and rec.EventType == KEY_EVENT: - key_event = rec.Event.KeyEvent - e.data += key_event.uChar.UnicodeChar + recs, rec_count = self._read_input_bulk(False, 1024) + for i in range(rec_count): + rec = recs[i] + if rec and rec.EventType == KEY_EVENT: + key_event = rec.Event.KeyEvent + ch = key_event.uChar.UnicodeChar + if ch == "\r": + ch += "\n" + e.data += ch return e def wait(self, timeout: float | None) -> bool: From dac896144c163a5308108af0faa4c62d498d76d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 11:58:37 +0200 Subject: [PATCH 13/22] Remove colors from Windows low-level console tests --- Lib/test/test_pyrepl/test_windows_console.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index 9d04bcff80b64e..e95fec46a851ee 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -11,8 +11,8 @@ from unittest import TestCase from unittest.mock import MagicMock, call -from .support import handle_all_events, code_to_events, reader_no_colors -from .support import prepare_reader as default_prepare_reader +from .support import handle_all_events, code_to_events +from .support import reader_no_colors as default_prepare_reader try: from _pyrepl.console import Event, Console @@ -258,9 +258,7 @@ def test_resize_bigger_on_multiline_function(self): # fmt: on events = itertools.chain(code_to_events(code)) - reader, console = self.handle_events_short( - events, prepare_reader=reader_no_colors - ) + reader, console = self.handle_events_short(events) console.height = 2 console.getheightwidth = MagicMock(lambda _: (2, 80)) From 7891fa7937e62728458f6a06f49acaec94e51310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 12:02:51 +0200 Subject: [PATCH 14/22] Fix lint and stuff --- Lib/_pyrepl/windows_console.py | 4 +++- Lib/test/test_pyrepl/test_reader.py | 2 +- Lib/test/test_pyrepl/test_utils.py | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 7bcdac8167c800..77985e59a93249 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -426,7 +426,9 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec - def _read_input_bulk(self, block: bool, n: int) -> tuple[INPUT_RECORD, int]: + def _read_input_bulk( + self, block: bool, n: int + ) -> tuple[ctypes.Array[INPUT_RECORD], int]: rec = (n * INPUT_RECORD)() read = DWORD() diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 3e36fe23c88741..3aa557b77042f0 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -499,4 +499,4 @@ def unfinished_function(): events = code_to_events(code) reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) self.assert_screen_equal(reader, code, clean=True) - self.assert_screen_equal(reader, expected) \ No newline at end of file + self.assert_screen_equal(reader, expected) diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 959a3bbea1ef14..8ce1e5371386f0 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -32,7 +32,7 @@ def gen_normal(): yield 2 yield 3 yield 4 - + pnw = prev_next_window(gen_normal()) self.assertEqual(next(pnw), (None, 1, 2)) self.assertEqual(next(pnw), (1, 2, 3)) @@ -43,7 +43,7 @@ def gen_normal(): def gen_short(): yield 1 - + pnw = prev_next_window(gen_short()) self.assertEqual(next(pnw), (None, 1, None)) with self.assertRaises(StopIteration): @@ -52,7 +52,7 @@ def gen_short(): def gen_raise(): yield from gen_normal() 1/0 - + pnw = prev_next_window(gen_raise()) self.assertEqual(next(pnw), (None, 1, 2)) self.assertEqual(next(pnw), (1, 2, 3)) From 362a21b324e6b90247e898aa00b83c9323aafbb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 12:46:23 +0200 Subject: [PATCH 15/22] Add experimental theming support for syntax highlighting and the prompt --- Doc/whatsnew/3.14.rst | 17 ++++++++++++ Lib/_colorize.py | 42 ++++++++++++++++++++++++++++- Lib/_pyrepl/reader.py | 10 ++++--- Lib/_pyrepl/utils.py | 40 ++++++--------------------- Lib/test/test_pyrepl/test_reader.py | 6 ++--- 5 files changed, 76 insertions(+), 39 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 646a0b4007fc05..b3cacdfc5d7e38 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -485,6 +485,23 @@ For further information on how to build Python, see (Contributed by Ken Jin in :gh:`128563`, with ideas on how to implement this in CPython by Mark Shannon, Garrett Gu, Haoran Xu, and Josh Haberman.) +Syntax highlighting in PyREPL +----------------------------- + +The default :term:`interactive` shell now highlights Python syntax as you +type. The feature is enabled by default unless the +:envvar:`PYTHON_BASIC_REPL` environment is set or any color-disabling +environment variables are used. See :ref:`using-on-controlling-color` for +details. + +The default color theme for syntax highlighting strives for good contrast +and uses exclusively the 4-bit VGA standard ANSI color codes for maximum +compatibility. The theme can be customized using an experimental API +``_colorize.set_theme()``. This can be called interactively, as well as +in the :envvar:`PYTHONSTARTUP` script. + +(Contributed by Łukasz Langa in :gh:`131507`.) + Other language changes ====================== diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 9eb6f0933b8150..c421b8bd07dab6 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -7,7 +7,22 @@ # types if False: - from typing import IO + from typing import IO, Literal + + type ColorTag = ( + Literal["PROMPT"] + | Literal["KEYWORD"] + | Literal["BUILTIN"] + | Literal["COMMENT"] + | Literal["STRING"] + | Literal["NUMBER"] + | Literal["OP"] + | Literal["DEFINITION"] + | Literal["SOFT_KEYWORD"] + | Literal["RESET"] + ) + + theme: dict[ColorTag, str] class ANSIColors: @@ -110,3 +125,28 @@ def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: return os.isatty(file.fileno()) except io.UnsupportedOperation: return hasattr(file, "isatty") and file.isatty() + + +def set_theme(t: dict[ColorTag, str] | None = None) -> None: + global theme + + if t: + theme = t + return + + colors = get_colors() + theme = { + "PROMPT": colors.BOLD_MAGENTA, + "KEYWORD": colors.BOLD_BLUE, + "BUILTIN": colors.CYAN, + "COMMENT": colors.RED, + "STRING": colors.GREEN, + "NUMBER": colors.YELLOW, + "OP": colors.RESET, + "DEFINITION": colors.BOLD_WHITE, + "SOFT_KEYWORD": colors.BOLD_BLUE, + "RESET": colors.RESET, + } + + +set_theme() \ No newline at end of file diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index cbb2fd7ee364fc..52504e17446dc5 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -22,10 +22,10 @@ from __future__ import annotations import sys +import _colorize from contextlib import contextmanager from dataclasses import dataclass, field, fields -from _colorize import can_colorize, ANSIColors from . import commands, console, input from .utils import wlen, unbracket, disp_str, gen_colors @@ -273,7 +273,7 @@ def __post_init__(self) -> None: self.screeninfo = [(0, [])] self.cxy = self.pos2xy() self.lxy = (self.pos, 0) - self.can_colorize = can_colorize() + self.can_colorize = _colorize.can_colorize() self.last_refresh_cache.screeninfo = self.screeninfo self.last_refresh_cache.pos = self.pos @@ -492,7 +492,11 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: prompt = self.ps1 if self.can_colorize: - prompt = f"{ANSIColors.BOLD_MAGENTA}{prompt}{ANSIColors.RESET}" + prompt = ( + f"{_colorize.theme["PROMPT"]}" + f"{prompt}" + f"{_colorize.theme["RESET"]}" + ) return prompt def push_input_trans(self, itrans: input.KeymapTranslator) -> None: diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 41d5a369c7878a..2d921083267b5f 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -1,3 +1,4 @@ +from __future__ import annotations import builtins import functools import keyword @@ -5,12 +6,12 @@ import token as T import tokenize import unicodedata +import _colorize from collections import deque from io import StringIO from tokenize import TokenInfo as TI -from typing import Iterable, Iterator, Literal, Match, NamedTuple, Self -from _colorize import ANSIColors +from typing import Iterable, Iterator, Match, NamedTuple, Self from .types import CharBuffer, CharWidths from .trace import trace @@ -21,18 +22,6 @@ IDENTIFIERS_AFTER = {"def", "class"} BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} -type ColorTag = ( - Literal["KEYWORD"] - | Literal["BUILTIN"] - | Literal["COMMENT"] - | Literal["STRING"] - | Literal["NUMBER"] - | Literal["OP"] - | Literal["DEFINITION"] - | Literal["SOFT_KEYWORD"] - | Literal["SYNC"] -) - class Span(NamedTuple): """Span indexing that's inclusive on both ends.""" @@ -55,20 +44,7 @@ def from_token(cls, token: TI, line_len: list[int]) -> Self: class ColorSpan(NamedTuple): span: Span - tag: ColorTag - - -TAG_TO_ANSI: dict[ColorTag, str] = { - "KEYWORD": ANSIColors.BOLD_BLUE, - "BUILTIN": ANSIColors.CYAN, - "COMMENT": ANSIColors.RED, - "STRING": ANSIColors.GREEN, - "NUMBER": ANSIColors.YELLOW, - "OP": ANSIColors.RESET, - "DEFINITION": ANSIColors.BOLD_WHITE, - "SOFT_KEYWORD": ANSIColors.BOLD_BLUE, - "SYNC": ANSIColors.RESET, -} + tag: _colorize.ColorTag @functools.cache @@ -305,11 +281,11 @@ def disp_str( post_color = "" if colors and colors[0].span.start < start_index: # looks like we're continuing a previous color (e.g. a multiline str) - pre_color = TAG_TO_ANSI[colors[0].tag] + pre_color = _colorize.theme[colors[0].tag] for i, c in enumerate(buffer, start_index): if colors and colors[0].span.start == i: # new color starts now - pre_color = TAG_TO_ANSI[colors[0].tag] + pre_color = _colorize.theme[colors[0].tag] if c == "\x1a": # CTRL-Z on Windows chars.append(c) @@ -326,7 +302,7 @@ def disp_str( char_widths.append(str_width(c)) if colors and colors[0].span.end == i: # current color ends now - post_color = TAG_TO_ANSI["SYNC"] + post_color = _colorize.theme["RESET"] colors.pop(0) chars[-1] = pre_color + chars[-1] + post_color @@ -336,7 +312,7 @@ def disp_str( if colors and colors[0].span.start < i and colors[0].span.end > i: # even though the current color should be continued, reset it for now. # the next call to `disp_str()` will revive it. - chars[-1] += TAG_TO_ANSI["SYNC"] + chars[-1] += _colorize.theme["RESET"] # trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 3aa557b77042f0..8d7fcf538d2064 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -11,11 +11,11 @@ from .support import reader_no_colors as prepare_reader from _pyrepl.console import Event from _pyrepl.reader import Reader -from _pyrepl.utils import TAG_TO_ANSI +from _colorize import theme -overrides = {"SYNC": "z", "SOFT_KEYWORD": "K"} -colors = {overrides.get(k, k[0].lower()): v for k, v in TAG_TO_ANSI.items()} +overrides = {"RESET": "z", "SOFT_KEYWORD": "K"} +colors = {overrides.get(k, k[0].lower()): v for k, v in theme.items()} class TestReader(ScreenEqualMixin, TestCase): From ffebbbe3d2df946daf1fd68fd0b7800cb9201a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 13:03:21 +0200 Subject: [PATCH 16/22] Fix lint --- Lib/_colorize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_colorize.py b/Lib/_colorize.py index c421b8bd07dab6..8241e642a09b3d 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -149,4 +149,4 @@ def set_theme(t: dict[ColorTag, str] | None = None) -> None: } -set_theme() \ No newline at end of file +set_theme() From 9b603826f6063b1deb534fc169f9170be5fe3f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Thu, 1 May 2025 19:37:45 +0200 Subject: [PATCH 17/22] Add t-string support to syntax highlighting --- Lib/_pyrepl/_module_completer.py | 9 +++++---- Lib/_pyrepl/mypy.ini | 4 ---- Lib/_pyrepl/utils.py | 12 +++++++++--- Lib/token.py | 6 +++--- Misc/mypy/token.py | 1 + Misc/mypy/typed-stdlib.txt | 1 + Tools/build/generate_token.py | 6 +++--- 7 files changed, 22 insertions(+), 17 deletions(-) create mode 120000 Misc/mypy/token.py diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 1fb043e0b70479..347f05607c75c5 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -2,6 +2,7 @@ import pkgutil import sys +import token import tokenize from io import StringIO from contextlib import contextmanager @@ -180,8 +181,8 @@ class ImportParser: when parsing multiple statements. """ _ignored_tokens = { - tokenize.INDENT, tokenize.DEDENT, tokenize.COMMENT, - tokenize.NL, tokenize.NEWLINE, tokenize.ENDMARKER + token.INDENT, token.DEDENT, token.COMMENT, + token.NL, token.NEWLINE, token.ENDMARKER } _keywords = {'import', 'from', 'as'} @@ -350,11 +351,11 @@ def peek(self) -> TokenInfo | None: def peek_name(self) -> bool: if not (tok := self.peek()): return False - return tok.type == tokenize.NAME + return tok.type == token.NAME def pop_name(self) -> str: tok = self.pop() - if tok.type != tokenize.NAME: + if tok.type != token.NAME: raise ParseError('pop_name') return tok.string diff --git a/Lib/_pyrepl/mypy.ini b/Lib/_pyrepl/mypy.ini index eabd0e9b440bf4..9375a55b53ce8b 100644 --- a/Lib/_pyrepl/mypy.ini +++ b/Lib/_pyrepl/mypy.ini @@ -23,7 +23,3 @@ check_untyped_defs = False # Various internal modules that typeshed deliberately doesn't have stubs for: [mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] ignore_missing_imports = True - -# Other untyped parts of the stdlib -[mypy-idlelib.*] -ignore_missing_imports = True diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 2d921083267b5f..bcde98c28917f2 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -113,8 +113,10 @@ def recover_unterminated_string( if ( msg.startswith("unterminated string literal") or msg.startswith("unterminated f-string literal") + or msg.startswith("unterminated t-string literal") or msg.startswith("EOF in multi-line string") or msg.startswith("unterminated triple-quoted f-string literal") + or msg.startswith("unterminated triple-quoted t-string literal") ): start = line_lengths[loc[0] - 1] + loc[1] - 1 end = line_lengths[-1] - 1 @@ -149,7 +151,11 @@ def gen_colors_from_token_stream( continue match token.type: - case T.STRING | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END: + case ( + T.STRING + | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END + | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END + ): span = Span.from_token(token, line_lengths) yield ColorSpan(span, "STRING") case T.COMMENT: @@ -202,7 +208,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: case ( None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="match"), - TI(T.NUMBER | T.STRING | T.FSTRING_START) + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) | TI(T.OP, string="(" | "*" | "-" | "+" | "[" | "{" | "~" | "...") ): return True @@ -217,7 +223,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: case ( None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="case"), - TI(T.NUMBER | T.STRING | T.FSTRING_START) + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) | TI(T.OP, string="(" | "*" | "-" | "[" | "{") ): return True diff --git a/Lib/token.py b/Lib/token.py index a1fde61cd8a070..f61723cc09da02 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -134,11 +134,11 @@ '~': TILDE, } -def ISTERMINAL(x): +def ISTERMINAL(x: int) -> bool: return x < NT_OFFSET -def ISNONTERMINAL(x): +def ISNONTERMINAL(x: int) -> bool: return x >= NT_OFFSET -def ISEOF(x): +def ISEOF(x: int) -> bool: return x == ENDMARKER diff --git a/Misc/mypy/token.py b/Misc/mypy/token.py new file mode 120000 index 00000000000000..0a39f726dda1aa --- /dev/null +++ b/Misc/mypy/token.py @@ -0,0 +1 @@ +../../Lib/token.py \ No newline at end of file diff --git a/Misc/mypy/typed-stdlib.txt b/Misc/mypy/typed-stdlib.txt index 9b27ee0d2de077..07b88ba7445326 100644 --- a/Misc/mypy/typed-stdlib.txt +++ b/Misc/mypy/typed-stdlib.txt @@ -2,4 +2,5 @@ _colorize.py _pyrepl +token.py tomllib \ No newline at end of file diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py index a64806763f3fce..9ee5ec86e75d47 100755 --- a/Tools/build/generate_token.py +++ b/Tools/build/generate_token.py @@ -278,13 +278,13 @@ def make_rst(infile, outfile='Doc/library/token-list.inc', %s } -def ISTERMINAL(x): +def ISTERMINAL(x: int) -> bool: return x < NT_OFFSET -def ISNONTERMINAL(x): +def ISNONTERMINAL(x: int) -> bool: return x >= NT_OFFSET -def ISEOF(x): +def ISEOF(x: int) -> bool: return x == ENDMARKER ''' From f835dba6e548d3e94205ee617c611efcc30c40de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 2 May 2025 14:48:19 +0200 Subject: [PATCH 18/22] Apply suggestions from code review Co-authored-by: Victorien <65306057+Viicos@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/_colorize.py | 26 +++++++++++++------------- Lib/_pyrepl/reader.py | 1 - Lib/_pyrepl/utils.py | 17 +++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 8241e642a09b3d..45f1d2ca2b2aa9 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -9,18 +9,18 @@ if False: from typing import IO, Literal - type ColorTag = ( - Literal["PROMPT"] - | Literal["KEYWORD"] - | Literal["BUILTIN"] - | Literal["COMMENT"] - | Literal["STRING"] - | Literal["NUMBER"] - | Literal["OP"] - | Literal["DEFINITION"] - | Literal["SOFT_KEYWORD"] - | Literal["RESET"] - ) + type ColorTag = Literal[ + "PROMPT", + "KEYWORD", + "BUILTIN", + "COMMENT", + "STRING", + "NUMBER", + "OP", + "DEFINITION", + "SOFT_KEYWORD", + "RESET", + ] theme: dict[ColorTag, str] @@ -143,7 +143,7 @@ def set_theme(t: dict[ColorTag, str] | None = None) -> None: "STRING": colors.GREEN, "NUMBER": colors.YELLOW, "OP": colors.RESET, - "DEFINITION": colors.BOLD_WHITE, + "DEFINITION": colors.BOLD, "SOFT_KEYWORD": colors.BOLD_BLUE, "RESET": colors.RESET, } diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 52504e17446dc5..65c2230dfd65f7 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -345,7 +345,6 @@ def calc_screen(self) -> list[str]: prompt, prompt_len = self.process_prompt(prompt) chars, char_widths = disp_str(line, colors, offset) wrapcount = (sum(char_widths) + prompt_len) // self.console.width - # trace("wrapcount = {wrapcount}", wrapcount=wrapcount) if wrapcount == 0 or not char_widths: offset += line_len + 1 # Takes all of the line plus the newline last_refresh_line_end_offsets.append(offset) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index bcde98c28917f2..4123ebc3dacda3 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -110,13 +110,15 @@ def recover_unterminated_string( buffer: str, ) -> Iterator[ColorSpan]: msg, loc = exc.args - if ( - msg.startswith("unterminated string literal") - or msg.startswith("unterminated f-string literal") - or msg.startswith("unterminated t-string literal") - or msg.startswith("EOF in multi-line string") - or msg.startswith("unterminated triple-quoted f-string literal") - or msg.startswith("unterminated triple-quoted t-string literal") + if msg.startswith( + ( + "unterminated string literal", + "unterminated f-string literal", + "unterminated t-string literal", + "EOF in multi-line string", + "unterminated triple-quoted f-string literal", + "unterminated triple-quoted t-string literal", + ) ): start = line_lengths[loc[0] - 1] + loc[1] - 1 end = line_lengths[-1] - 1 @@ -320,7 +322,6 @@ def disp_str( # the next call to `disp_str()` will revive it. chars[-1] += _colorize.theme["RESET"] - # trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths From 9003d05eb17ff28da2ffacba01c7479a8fe0fef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 2 May 2025 14:53:31 +0200 Subject: [PATCH 19/22] Add _colorize.ANSIColors.BOLD --- Lib/_colorize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 45f1d2ca2b2aa9..d2d879b659c8f0 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -37,6 +37,7 @@ class ANSIColors: WHITE = "\x1b[37m" # more like LIGHT GRAY YELLOW = "\x1b[33m" + BOLD = "\x1b[1m" BOLD_BLACK = "\x1b[1;30m" # DARK GRAY BOLD_BLUE = "\x1b[1;34m" BOLD_CYAN = "\x1b[1;36m" From ff1f92bf10c2341b9abfa16071bf42803aac16e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 2 May 2025 15:45:34 +0200 Subject: [PATCH 20/22] Remove - and + from first sets matching for `match` --- Lib/_pyrepl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 4123ebc3dacda3..0fdd40c9fc455f 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -211,7 +211,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="match"), TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) - | TI(T.OP, string="(" | "*" | "-" | "+" | "[" | "{" | "~" | "...") + | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") ): return True case ( From bd84cd89f64614804fb38f10e236fa9823a1fdd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 2 May 2025 19:25:39 +0200 Subject: [PATCH 21/22] Address Pablo's review --- Lib/_pyrepl/commands.py | 3 +-- Lib/_pyrepl/utils.py | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 4d80e503502f3f..2054a8e400fdf6 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -21,6 +21,7 @@ from __future__ import annotations import os +import time # Categories of actions: # killing @@ -481,9 +482,7 @@ class perform_bracketed_paste(Command): def do(self) -> None: done = "\x1b[201~" data = "" - import time start = time.time() - trace("bracketed pasting starts") while done not in data: self.reader.console.wait(100) ev = self.reader.console.getpending() diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 0fdd40c9fc455f..81de50cf12262e 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -110,6 +110,11 @@ def recover_unterminated_string( buffer: str, ) -> Iterator[ColorSpan]: msg, loc = exc.args + if loc is None: + return + + line_no, column = loc + if msg.startswith( ( "unterminated string literal", @@ -120,7 +125,7 @@ def recover_unterminated_string( "unterminated triple-quoted t-string literal", ) ): - start = line_lengths[loc[0] - 1] + loc[1] - 1 + start = line_lengths[line_no - 1] + column - 1 end = line_lengths[-1] - 1 # in case FSTRING_START was already emitted From 080f3006cb01b06821a9df95f3eb0b6d7127e5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 2 May 2025 19:41:55 +0200 Subject: [PATCH 22/22] Exclude applying soft keywords inside any bracket pairs, not just parentheses --- Lib/_pyrepl/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 81de50cf12262e..fe154aa59a00fe 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -151,7 +151,7 @@ def gen_colors_from_token_stream( token_window = prev_next_window(token_generator) is_def_name = False - paren_level = 0 + bracket_level = 0 for prev_token, token, next_token in token_window: assert token is not None if token.start == token.end: @@ -172,10 +172,10 @@ def gen_colors_from_token_stream( span = Span.from_token(token, line_lengths) yield ColorSpan(span, "NUMBER") case T.OP: - if token.string == "(": - paren_level += 1 - elif token.string == ")": - paren_level -= 1 + if token.string in "([{": + bracket_level += 1 + elif token.string in ")]}": + bracket_level -= 1 span = Span.from_token(token, line_lengths) yield ColorSpan(span, "OP") case T.NAME: @@ -190,7 +190,7 @@ def gen_colors_from_token_stream( is_def_name = True elif ( keyword.issoftkeyword(token.string) - and paren_level == 0 + and bracket_level == 0 and is_soft_keyword_used(prev_token, token, next_token) ): span = Span.from_token(token, line_lengths)