Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 030cff2

Browse files
committed
lint and refactor and reduce, rename
1 parent 6560e86 commit 030cff2

5 files changed

Lines changed: 21 additions & 33 deletions

File tree

docs/intro.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ languages.
444444
History
445445
=======
446446

447-
*next version*
447+
0.4.0 *2026-01-25*
448448
* **New** Functions `iter_graphemes_reverse()`_, `grapheme_boundary_before()`_.
449449
* **Bugfix** OSC Hyperlinks should not be broken by ``wrap()``
450450

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
44

55
[project]
66
name = "wcwidth"
7-
version = "0.3.5"
7+
version = "0.4.0"
88
description = "Measures the displayed width of unicode strings in a terminal"
99
readme = "README.rst"
1010
keywords = [

tests/test_grapheme.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ def test_unicode_grapheme_break_test(input_str, expected):
170170
(PREPEND_CHAR + 'a', 2, 0), # whole cluster
171171
# Prepend + Control: control breaks (GB4)
172172
(PREPEND_CHAR + '\n', 2, 1), # '\n' separate at 1
173+
# C1 control (NEL, 0x85) stops backward scan in _find_cluster_start (GB4)
174+
('X\x85\u0301', 3, 2),
173175
])
174176
def test_grapheme_boundary_before_basic(text, pos, expected):
175177
"""Basic grapheme_boundary_before tests."""

wcwidth/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@
4040
# We also used pkg_resources to load unicode version tables from version.json,
4141
# generated by bin/update-tables.py, but some environments are unable to
4242
# import pkg_resources for one reason or another, yikes!
43-
__version__ = '0.3.5'
43+
__version__ = '0.4.0'

wcwidth/grapheme.py

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
# std imports
3737
from collections.abc import Iterator
3838

39+
# Maximum backward scan distance when finding grapheme cluster boundaries.
40+
# Covers all known Unicode grapheme clusters with margin; longer sequences are pathological.
41+
MAX_GRAPHEME_SCAN = 32
42+
3943

4044
class GCB(IntEnum):
4145
"""Grapheme Cluster Break property values."""
@@ -317,51 +321,42 @@ def _find_cluster_start(text: str, pos: int) -> int:
317321
:param pos: Position to search before (exclusive).
318322
:returns: Start position of the grapheme cluster.
319323
"""
320-
# We're finding the cluster containing text[pos-1]
321-
target_char = text[pos - 1]
322-
target_cp = ord(target_char)
324+
target_cp = ord(text[pos - 1])
323325

324326
# GB3: CR x LF - LF after CR is part of same cluster
325-
if target_char == '\n' and pos >= 2 and text[pos - 2] == '\r':
327+
if target_cp == 0x0A and pos >= 2 and text[pos - 2] == '\r':
326328
return pos - 2
327329

328330
# Fast path: ASCII (except LF) starts its own cluster
329331
if target_cp < 0x80:
330332
# GB9b: Check for preceding PREPEND (rare: Arabic/Brahmic)
331333
if pos >= 2 and target_cp >= 0x20:
332-
preceding_cp = ord(text[pos - 2])
333-
if preceding_cp >= 0x80 and _grapheme_cluster_break(preceding_cp) == GCB.PREPEND:
334+
prev_cp = ord(text[pos - 2])
335+
if prev_cp >= 0x80 and _grapheme_cluster_break(prev_cp) == GCB.PREPEND:
334336
return _find_cluster_start(text, pos - 1)
335337
return pos - 1
336338

337-
# Phase 1: Scan backward to find a safe starting point
339+
# Scan backward to find a safe starting point
338340
safe_start = pos - 1
339-
max_scan = 32 # Bounded by max grapheme cluster complexity
340-
341-
while safe_start > 0 and (pos - safe_start) < max_scan:
341+
while safe_start > 0 and (pos - safe_start) < MAX_GRAPHEME_SCAN:
342342
cp = ord(text[safe_start])
343343
if 0x20 <= cp < 0x80: # ASCII always starts a cluster
344344
break
345-
if _grapheme_cluster_break(cp) == GCB.CONTROL: # Control breaks after (GB4)
345+
if _grapheme_cluster_break(cp) == GCB.CONTROL: # GB4
346346
break
347347
safe_start -= 1
348348

349-
# Phase 2: Verify forward to find the actual cluster boundary
349+
# Verify forward to find the actual cluster boundary
350350
cluster_start = safe_start
351-
ri_count = 0
352-
353351
left_gcb = _grapheme_cluster_break(ord(text[safe_start]))
354-
if left_gcb == GCB.REGIONAL_INDICATOR:
355-
ri_count = 1
352+
ri_count = 1 if left_gcb == GCB.REGIONAL_INDICATOR else 0
356353

357354
for i in range(safe_start + 1, pos):
358355
right_gcb = _grapheme_cluster_break(ord(text[i]))
359356
result = _should_break(left_gcb, right_gcb, text, i, ri_count)
360357
ri_count = result.ri_count
361-
362358
if result.should_break:
363359
cluster_start = i
364-
365360
left_gcb = right_gcb
366361

367362
return cluster_start
@@ -386,10 +381,7 @@ def grapheme_boundary_before(unistr: str, pos: int) -> int:
386381
"""
387382
if pos <= 0:
388383
return 0
389-
if pos > len(unistr):
390-
pos = len(unistr)
391-
392-
return _find_cluster_start(unistr, pos)
384+
return _find_cluster_start(unistr, min(pos, len(unistr)))
393385

394386

395387
def iter_graphemes_reverse(
@@ -417,14 +409,8 @@ def iter_graphemes_reverse(
417409

418410
length = len(unistr)
419411

420-
if end is None:
421-
end = length
422-
else:
423-
end = min(end, length)
424-
425-
# Clamp start to valid range
426-
if start < 0:
427-
start = 0
412+
end = length if end is None else min(end, length)
413+
start = max(start, 0)
428414

429415
if start >= end or start >= length:
430416
return

0 commit comments

Comments
 (0)