6565 "u" : SRE_FLAG_UNICODE ,
6666}
6767
68+ GLOBAL_FLAGS = (SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE |
69+ SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE )
70+
71+ class Verbose (Exception ):
72+ pass
73+
6874class Pattern :
6975 # master pattern object. keeps track of global attributes
7076 def __init__ (self ):
@@ -184,7 +190,7 @@ def getwidth(self):
184190 lo = lo + i
185191 hi = hi + j
186192 elif op is SUBPATTERN :
187- i , j = av [1 ].getwidth ()
193+ i , j = av [- 1 ].getwidth ()
188194 lo = lo + i
189195 hi = hi + j
190196 elif op in _REPEATCODES :
@@ -395,15 +401,15 @@ def _escape(source, escape, state):
395401 pass
396402 raise source .error ("bad escape %s" % escape , len (escape ))
397403
398- def _parse_sub (source , state , nested = True ):
404+ def _parse_sub (source , state , verbose , nested = True ):
399405 # parse an alternation: a|b|c
400406
401407 items = []
402408 itemsappend = items .append
403409 sourcematch = source .match
404410 start = source .tell ()
405411 while True :
406- itemsappend (_parse (source , state ))
412+ itemsappend (_parse (source , state , verbose ))
407413 if not sourcematch ("|" ):
408414 break
409415
@@ -445,10 +451,10 @@ def _parse_sub(source, state, nested=True):
445451 subpattern .append ((BRANCH , (None , items )))
446452 return subpattern
447453
448- def _parse_sub_cond (source , state , condgroup ):
449- item_yes = _parse (source , state )
454+ def _parse_sub_cond (source , state , condgroup , verbose ):
455+ item_yes = _parse (source , state , verbose )
450456 if source .match ("|" ):
451- item_no = _parse (source , state )
457+ item_no = _parse (source , state , verbose )
452458 if source .next == "|" :
453459 raise source .error ("conditional backref with more than two branches" )
454460 else :
@@ -457,7 +463,7 @@ def _parse_sub_cond(source, state, condgroup):
457463 subpattern .append ((GROUPREF_EXISTS , (condgroup , item_yes , item_no )))
458464 return subpattern
459465
460- def _parse (source , state ):
466+ def _parse (source , state , verbose ):
461467 # parse a simple pattern
462468 subpattern = SubPattern (state )
463469
@@ -467,7 +473,6 @@ def _parse(source, state):
467473 sourcematch = source .match
468474 _len = len
469475 _ord = ord
470- verbose = state .flags & SRE_FLAG_VERBOSE
471476
472477 while True :
473478
@@ -621,6 +626,8 @@ def _parse(source, state):
621626 group = True
622627 name = None
623628 condgroup = None
629+ add_flags = 0
630+ del_flags = 0
624631 if sourcematch ("?" ):
625632 # options
626633 char = sourceget ()
@@ -682,7 +689,7 @@ def _parse(source, state):
682689 lookbehindgroups = state .lookbehindgroups
683690 if lookbehindgroups is None :
684691 state .lookbehindgroups = state .groups
685- p = _parse_sub (source , state )
692+ p = _parse_sub (source , state , verbose )
686693 if dir < 0 :
687694 if lookbehindgroups is None :
688695 state .lookbehindgroups = None
@@ -718,19 +725,13 @@ def _parse(source, state):
718725 raise source .error ("invalid group reference" ,
719726 len (condname ) + 1 )
720727 state .checklookbehindgroup (condgroup , source )
721- elif char in FLAGS :
728+ elif char in FLAGS or char == "-" :
722729 # flags
723- while True :
724- state .flags |= FLAGS [char ]
725- char = sourceget ()
726- if char is None :
727- raise source .error ("missing )" )
728- if char == ")" :
729- break
730- if char not in FLAGS :
731- raise source .error ("unknown flag" , len (char ))
732- verbose = state .flags & SRE_FLAG_VERBOSE
733- continue
730+ flags = _parse_flags (source , state , char )
731+ if flags is None : # global flags
732+ continue
733+ add_flags , del_flags = flags
734+ group = None
734735 else :
735736 raise source .error ("unknown extension ?" + char ,
736737 len (char ) + 1 )
@@ -742,15 +743,17 @@ def _parse(source, state):
742743 except error as err :
743744 raise source .error (err .msg , len (name ) + 1 ) from None
744745 if condgroup :
745- p = _parse_sub_cond (source , state , condgroup )
746+ p = _parse_sub_cond (source , state , condgroup , verbose )
746747 else :
747- p = _parse_sub (source , state )
748+ sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE )) and
749+ not (del_flags & SRE_FLAG_VERBOSE ))
750+ p = _parse_sub (source , state , sub_verbose )
748751 if not source .match (")" ):
749752 raise source .error ("missing ), unterminated subpattern" ,
750753 source .tell () - start )
751754 if group is not None :
752755 state .closegroup (group , p )
753- subpatternappend ((SUBPATTERN , (group , p )))
756+ subpatternappend ((SUBPATTERN , (group , add_flags , del_flags , p )))
754757
755758 elif this == "^" :
756759 subpatternappend ((AT , AT_BEGINNING ))
@@ -763,6 +766,53 @@ def _parse(source, state):
763766
764767 return subpattern
765768
769+ def _parse_flags (source , state , char ):
770+ sourceget = source .get
771+ add_flags = 0
772+ del_flags = 0
773+ if char != "-" :
774+ while True :
775+ add_flags |= FLAGS [char ]
776+ char = sourceget ()
777+ if char is None :
778+ raise source .error ("missing -, : or )" )
779+ if char in ")-:" :
780+ break
781+ if char not in FLAGS :
782+ msg = "unknown flag" if char .isalpha () else "missing -, : or )"
783+ raise source .error (msg , len (char ))
784+ if char == ")" :
785+ if ((add_flags & SRE_FLAG_VERBOSE ) and
786+ not (state .flags & SRE_FLAG_VERBOSE )):
787+ raise Verbose
788+ state .flags |= add_flags
789+ return None
790+ if add_flags & GLOBAL_FLAGS :
791+ raise source .error ("bad inline flags: cannot turn on global flag" , 1 )
792+ if char == "-" :
793+ char = sourceget ()
794+ if char is None :
795+ raise source .error ("missing flag" )
796+ if char not in FLAGS :
797+ msg = "unknown flag" if char .isalpha () else "missing flag"
798+ raise source .error (msg , len (char ))
799+ while True :
800+ del_flags |= FLAGS [char ]
801+ char = sourceget ()
802+ if char is None :
803+ raise source .error ("missing :" )
804+ if char == ":" :
805+ break
806+ if char not in FLAGS :
807+ msg = "unknown flag" if char .isalpha () else "missing :"
808+ raise source .error (msg , len (char ))
809+ assert char == ":"
810+ if del_flags & GLOBAL_FLAGS :
811+ raise source .error ("bad inline flags: cannot turn off global flag" , 1 )
812+ if add_flags & del_flags :
813+ raise source .error ("bad inline flags: flag turned on and off" , 1 )
814+ return add_flags , del_flags
815+
766816def fix_flags (src , flags ):
767817 # Check and fix flags according to the type of pattern (str or bytes)
768818 if isinstance (src , str ):
@@ -789,18 +839,22 @@ def parse(str, flags=0, pattern=None):
789839 pattern .flags = flags
790840 pattern .str = str
791841
792- p = _parse_sub (source , pattern , 0 )
842+ try :
843+ p = _parse_sub (source , pattern , flags & SRE_FLAG_VERBOSE , False )
844+ except Verbose :
845+ # the VERBOSE flag was switched on inside the pattern. to be
846+ # on the safe side, we'll parse the whole thing again...
847+ pattern = Pattern ()
848+ pattern .flags = flags | SRE_FLAG_VERBOSE
849+ pattern .str = str
850+ p = _parse_sub (source , pattern , True , False )
851+
793852 p .pattern .flags = fix_flags (str , p .pattern .flags )
794853
795854 if source .next is not None :
796855 assert source .next == ")"
797856 raise source .error ("unbalanced parenthesis" )
798857
799- if not (flags & SRE_FLAG_VERBOSE ) and p .pattern .flags & SRE_FLAG_VERBOSE :
800- # the VERBOSE flag was switched on inside the pattern. to be
801- # on the safe side, we'll parse the whole thing again...
802- return parse (str , p .pattern .flags )
803-
804858 if flags & SRE_FLAG_DEBUG :
805859 p .dump ()
806860
0 commit comments