@@ -68,12 +68,15 @@ class Pattern:
6868 # master pattern object. keeps track of global attributes
6969 def __init__ (self ):
7070 self .flags = 0
71- self .open = []
72- self .groups = 1
7371 self .groupdict = {}
72+ self .subpatterns = [None ] # group 0
73+ self .lookbehindgroups = None
74+ @property
75+ def groups (self ):
76+ return len (self .subpatterns )
7477 def opengroup (self , name = None ):
7578 gid = self .groups
76- self .groups = gid + 1
79+ self .subpatterns . append ( None )
7780 if self .groups > MAXGROUPS :
7881 raise error ("groups number is too large" )
7982 if name is not None :
@@ -82,12 +85,19 @@ def opengroup(self, name=None):
8285 raise error ("redefinition of group name %r as group %d; "
8386 "was group %d" % (name , gid , ogid ))
8487 self .groupdict [name ] = gid
85- self .open .append (gid )
8688 return gid
87- def closegroup (self , gid ):
88- self .open . remove ( gid )
89+ def closegroup (self , gid , p ):
90+ self .subpatterns [ gid ] = p
8991 def checkgroup (self , gid ):
90- return gid < self .groups and gid not in self .open
92+ return gid < self .groups and self .subpatterns [gid ] is not None
93+
94+ def checklookbehindgroup (self , gid , source ):
95+ if self .lookbehindgroups is not None :
96+ if not self .checkgroup (gid ):
97+ raise source .error ('cannot refer to an open group' )
98+ if gid >= self .lookbehindgroups :
99+ raise source .error ('cannot refer to group defined in the same '
100+ 'lookbehind subpattern' )
91101
92102class SubPattern :
93103 # a subpattern, in intermediate form
@@ -183,7 +193,21 @@ def getwidth(self):
183193 elif op in _UNITCODES :
184194 lo = lo + 1
185195 hi = hi + 1
186- elif op == SUCCESS :
196+ elif op is GROUPREF :
197+ i , j = self .pattern .subpatterns [av ].getwidth ()
198+ lo = lo + i
199+ hi = hi + j
200+ elif op is GROUPREF_EXISTS :
201+ i , j = av [1 ].getwidth ()
202+ if av [2 ] is not None :
203+ l , h = av [2 ].getwidth ()
204+ i = min (i , l )
205+ j = max (j , h )
206+ else :
207+ i = 0
208+ lo = lo + i
209+ hi = hi + j
210+ elif op is SUCCESS :
187211 break
188212 self .width = min (lo , MAXREPEAT - 1 ), min (hi , MAXREPEAT )
189213 return self .width
@@ -379,6 +403,7 @@ def _escape(source, escape, state):
379403 if not state .checkgroup (group ):
380404 raise source .error ("cannot refer to open group" ,
381405 len (escape ))
406+ state .checklookbehindgroup (group , source )
382407 return GROUPREF , group
383408 raise ValueError
384409 if len (escape ) == 2 :
@@ -641,6 +666,7 @@ def _parse(source, state):
641666 if gid is None :
642667 msg = "unknown group name: {0!r}" .format (name )
643668 raise source .error (msg , len (name ) + 1 )
669+ state .checklookbehindgroup (gid , source )
644670 subpatternappend ((GROUPREF , gid ))
645671 continue
646672 else :
@@ -668,7 +694,13 @@ def _parse(source, state):
668694 if char is None or char not in "=!" :
669695 raise source .error ("syntax error" )
670696 dir = - 1 # lookbehind
697+ lookbehindgroups = state .lookbehindgroups
698+ if lookbehindgroups is None :
699+ state .lookbehindgroups = state .groups
671700 p = _parse_sub (source , state )
701+ if dir < 0 :
702+ if lookbehindgroups is None :
703+ state .lookbehindgroups = None
672704 if not sourcematch (")" ):
673705 raise source .error ("unbalanced parenthesis" )
674706 if char == "=" :
@@ -701,6 +733,7 @@ def _parse(source, state):
701733 if condgroup >= MAXGROUPS :
702734 raise source .error ("the group number is too large" ,
703735 len (condname ) + 1 )
736+ state .checklookbehindgroup (condgroup , source )
704737 elif char in FLAGS :
705738 # flags
706739 state .flags |= FLAGS [char ]
@@ -726,7 +759,7 @@ def _parse(source, state):
726759 if not sourcematch (")" ):
727760 raise source .error ("unbalanced parenthesis" )
728761 if group is not None :
729- state .closegroup (group )
762+ state .closegroup (group , p )
730763 subpatternappend ((SUBPATTERN , (group , p )))
731764 else :
732765 while True :
0 commit comments