@@ -409,57 +409,39 @@ def _generate_overlap_table(prefix):
409409 table [i ] = idx + 1
410410 return table
411411
412- def _compile_info (code , pattern , flags ):
413- # internal: compile an info block. in the current version,
414- # this contains min/max pattern width, and an optional literal
415- # prefix or a character map
416- lo , hi = pattern .getwidth ()
417- if hi > MAXCODE :
418- hi = MAXCODE
419- if lo == 0 :
420- code .extend ([INFO , 4 , 0 , lo , hi ])
421- return
422- # look for a literal prefix
412+ def _get_literal_prefix (pattern ):
413+ # look for literal prefix
423414 prefix = []
424415 prefixappend = prefix .append
425- prefix_skip = 0
416+ prefix_skip = None
417+ got_all = True
418+ for op , av in pattern .data :
419+ if op is LITERAL :
420+ prefixappend (av )
421+ elif op is SUBPATTERN :
422+ prefix1 , prefix_skip1 , got_all = _get_literal_prefix (av [1 ])
423+ if prefix_skip is None :
424+ if av [0 ] is not None :
425+ prefix_skip = len (prefix )
426+ elif prefix_skip1 is not None :
427+ prefix_skip = len (prefix ) + prefix_skip1
428+ prefix .extend (prefix1 )
429+ if not got_all :
430+ break
431+ else :
432+ got_all = False
433+ break
434+ return prefix , prefix_skip , got_all
435+
436+ def _get_charset_prefix (pattern ):
426437 charset = [] # not used
427438 charsetappend = charset .append
428- if not (flags & SRE_FLAG_IGNORECASE ):
429- # look for literal prefix
430- for op , av in pattern .data :
439+ if pattern .data :
440+ op , av = pattern .data [0 ]
441+ if op is SUBPATTERN and av [1 ]:
442+ op , av = av [1 ][0 ]
431443 if op is LITERAL :
432- if len (prefix ) == prefix_skip :
433- prefix_skip = prefix_skip + 1
434- prefixappend (av )
435- elif op is SUBPATTERN and len (av [1 ]) == 1 :
436- op , av = av [1 ][0 ]
437- if op is LITERAL :
438- prefixappend (av )
439- else :
440- break
441- else :
442- break
443- # if no prefix, look for charset prefix
444- if not prefix and pattern .data :
445- op , av = pattern .data [0 ]
446- if op is SUBPATTERN and av [1 ]:
447- op , av = av [1 ][0 ]
448- if op is LITERAL :
449- charsetappend ((op , av ))
450- elif op is BRANCH :
451- c = []
452- cappend = c .append
453- for p in av [1 ]:
454- if not p :
455- break
456- op , av = p [0 ]
457- if op is LITERAL :
458- cappend ((op , av ))
459- else :
460- break
461- else :
462- charset = c
444+ charsetappend ((op , av ))
463445 elif op is BRANCH :
464446 c = []
465447 cappend = c .append
@@ -473,8 +455,43 @@ def _compile_info(code, pattern, flags):
473455 break
474456 else :
475457 charset = c
476- elif op is IN :
477- charset = av
458+ elif op is BRANCH :
459+ c = []
460+ cappend = c .append
461+ for p in av [1 ]:
462+ if not p :
463+ break
464+ op , av = p [0 ]
465+ if op is LITERAL :
466+ cappend ((op , av ))
467+ else :
468+ break
469+ else :
470+ charset = c
471+ elif op is IN :
472+ charset = av
473+ return charset
474+
475+ def _compile_info (code , pattern , flags ):
476+ # internal: compile an info block. in the current version,
477+ # this contains min/max pattern width, and an optional literal
478+ # prefix or a character map
479+ lo , hi = pattern .getwidth ()
480+ if hi > MAXCODE :
481+ hi = MAXCODE
482+ if lo == 0 :
483+ code .extend ([INFO , 4 , 0 , lo , hi ])
484+ return
485+ # look for a literal prefix
486+ prefix = []
487+ prefix_skip = 0
488+ charset = [] # not used
489+ if not (flags & SRE_FLAG_IGNORECASE ):
490+ # look for literal prefix
491+ prefix , prefix_skip , got_all = _get_literal_prefix (pattern )
492+ # if no prefix, look for charset prefix
493+ if not prefix :
494+ charset = _get_charset_prefix (pattern )
478495## if prefix:
479496## print("*** PREFIX", prefix, prefix_skip)
480497## if charset:
@@ -487,7 +504,7 @@ def _compile_info(code, pattern, flags):
487504 mask = 0
488505 if prefix :
489506 mask = SRE_INFO_PREFIX
490- if len ( prefix ) == prefix_skip == len ( pattern . data ) :
507+ if prefix_skip is None and got_all :
491508 mask = mask | SRE_INFO_LITERAL
492509 elif charset :
493510 mask = mask | SRE_INFO_CHARSET
@@ -502,6 +519,8 @@ def _compile_info(code, pattern, flags):
502519 # add literal prefix
503520 if prefix :
504521 emit (len (prefix )) # length
522+ if prefix_skip is None :
523+ prefix_skip = len (prefix )
505524 emit (prefix_skip ) # skip
506525 code .extend (prefix )
507526 # generate overlap table
0 commit comments