1+ import os
12import re
23
34
4- def translate (pattern ):
5- r"""
6- Given a glob pattern, produce a regex that matches it.
5+ _default_seps = os .sep + str (os .altsep ) * bool (os .altsep )
76
8- >>> translate('*.txt')
9- '[^/]*\\.txt'
10- >>> translate('a?txt')
11- 'a.txt'
12- >>> translate('**/*')
13- '.*/[^/]*'
7+
8+ class Translator :
9+ """
10+ >>> Translator('xyz')
11+ Traceback (most recent call last):
12+ ...
13+ AssertionError: Invalid separators
14+
15+ >>> Translator('')
16+ Traceback (most recent call last):
17+ ...
18+ AssertionError: Invalid separators
1419 """
15- return '' .join (map (replace , separate (pattern )))
20+
21+ seps : str
22+
23+ def __init__ (self , seps : str = _default_seps ):
24+ assert seps and set (seps ) <= set (_default_seps ), "Invalid separators"
25+ self .seps = seps
26+
27+ def translate (self , pattern ):
28+ """
29+ Given a glob pattern, produce a regex that matches it.
30+ """
31+ return self .extend (self .translate_core (pattern ))
32+
33+ def extend (self , pattern ):
34+ r"""
35+ Extend regex for pattern-wide concerns.
36+
37+ Apply '(?s:)' to create a non-matching group that
38+ matches newlines (valid on Unix).
39+
40+ Append '\Z' to imply fullmatch even when match is used.
41+ """
42+ return rf'(?s:{ pattern } )\Z'
43+
44+ def translate_core (self , pattern ):
45+ r"""
46+ Given a glob pattern, produce a regex that matches it.
47+
48+ >>> t = Translator()
49+ >>> t.translate_core('*.txt').replace('\\\\', '')
50+ '[^/]*\\.txt'
51+ >>> t.translate_core('a?txt')
52+ 'a[^/]txt'
53+ >>> t.translate_core('**/*').replace('\\\\', '')
54+ '.*/[^/][^/]*'
55+ """
56+ self .restrict_rglob (pattern )
57+ return '' .join (map (self .replace , separate (self .star_not_empty (pattern ))))
58+
59+ def replace (self , match ):
60+ """
61+ Perform the replacements for a match from :func:`separate`.
62+ """
63+ return match .group ('set' ) or (
64+ re .escape (match .group (0 ))
65+ .replace ('\\ *\\ *' , r'.*' )
66+ .replace ('\\ *' , rf'[^{ re .escape (self .seps )} ]*' )
67+ .replace ('\\ ?' , r'[^/]' )
68+ )
69+
70+ def restrict_rglob (self , pattern ):
71+ """
72+ Raise ValueError if ** appears in anything but a full path segment.
73+
74+ >>> Translator().translate('**foo')
75+ Traceback (most recent call last):
76+ ...
77+ ValueError: ** must appear alone in a path segment
78+ """
79+ seps_pattern = rf'[{ re .escape (self .seps )} ]+'
80+ segments = re .split (seps_pattern , pattern )
81+ if any ('**' in segment and segment != '**' for segment in segments ):
82+ raise ValueError ("** must appear alone in a path segment" )
83+
84+ def star_not_empty (self , pattern ):
85+ """
86+ Ensure that * will not match an empty segment.
87+ """
88+
89+ def handle_segment (match ):
90+ segment = match .group (0 )
91+ return '?*' if segment == '*' else segment
92+
93+ not_seps_pattern = rf'[^{ re .escape (self .seps )} ]+'
94+ return re .sub (not_seps_pattern , handle_segment , pattern )
1695
1796
1897def separate (pattern ):
@@ -25,16 +104,3 @@ def separate(pattern):
25104 ['a', '[?]', 'txt']
26105 """
27106 return re .finditer (r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)' , pattern )
28-
29-
30- def replace (match ):
31- """
32- Perform the replacements for a match from :func:`separate`.
33- """
34-
35- return match .group ('set' ) or (
36- re .escape (match .group (0 ))
37- .replace ('\\ *\\ *' , r'.*' )
38- .replace ('\\ *' , r'[^/]*' )
39- .replace ('\\ ?' , r'.' )
40- )
0 commit comments