@@ -449,9 +449,7 @@ def toEncoding(self, s, encoding=None):
449449 s = self .toEncoding (str (s ), encoding or "utf8" )
450450 return s
451451
452- BARE_AMPERSAND_OR_BRACKET = re .compile ("([<>]|"
453- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
454- + ")" )
452+ BARE_AMPERSAND_OR_BRACKET = re .compile (r"([<>]|&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;))" )
455453
456454 def _sub_entity (self , x ):
457455 """Used with a regular expression to substitute the
@@ -574,7 +572,7 @@ def __init__(self, parser, name, attrs=None, parent=None,
574572 # Convert any HTML, XML, or numeric entities in the attribute values.
575573 # Reference: https://github.com/pkrumins/xgoogle/pull/16/commits/3dba1165c436b0d6e5bdbd09e53ca0dbf8a043f8
576574 convert = lambda k_val : (k_val [0 ],
577- re .sub ("&(#\d+|#x[0-9a-fA-F]+|\w+);" ,
575+ re .sub (r "&(#\d+|#x[0-9a-fA-F]+|\w+);" ,
578576 self ._convertEntities ,
579577 k_val [1 ]))
580578 self .attrs = map (convert , self .attrs )
@@ -1079,9 +1077,9 @@ class BeautifulStoneSoup(Tag, sgmllib.SGMLParser):
10791077 QUOTE_TAGS = {}
10801078 PRESERVE_WHITESPACE_TAGS = []
10811079
1082- MARKUP_MASSAGE = [(re .compile ('(<[^<>]*)/>' ),
1080+ MARKUP_MASSAGE = [(re .compile (r '(<[^<>]*)/>' ),
10831081 lambda x : x .group (1 ) + ' />' ),
1084- (re .compile ('<!\s+([^<>]*)>' ),
1082+ (re .compile (r '<!\s+([^<>]*)>' ),
10851083 lambda x : '<!' + x .group (1 ) + '>' )
10861084 ]
10871085
@@ -1590,7 +1588,7 @@ def __init__(self, *args, **kwargs):
15901588 NESTABLE_LIST_TAGS , NESTABLE_TABLE_TAGS )
15911589
15921590 # Used to detect the charset in a META tag; see start_meta
1593- CHARSET_RE = re .compile ("((^|;)\s*charset=)([^;]*)" , re .M )
1591+ CHARSET_RE = re .compile (r "((^|;)\s*charset=)([^;]*)" , re .M )
15941592
15951593 def start_meta (self , attrs ):
15961594 """Beautiful Soup can detect a charset included in a META tag,
@@ -1934,9 +1932,9 @@ def _detectEncoding(self, xml_data, isHTML=False):
19341932 except :
19351933 xml_encoding_match = None
19361934 xml_encoding_match = re .compile (
1937- '^<\?.*encoding=[\' "](.*?)[\' "].*\?>' ).match (xml_data )
1935+ r '^<\?.*encoding=[\'"](.*?)[\'"].*\?>' ).match (xml_data )
19381936 if not xml_encoding_match and isHTML :
1939- regexp = re .compile ('<\s*meta[^>]+charset=([^>]*?)[;\' ">]' , re .I )
1937+ regexp = re .compile (r '<\s*meta[^>]+charset=([^>]*?)[;\'">]' , re .I )
19401938 xml_encoding_match = regexp .search (xml_data )
19411939 if xml_encoding_match is not None :
19421940 xml_encoding = xml_encoding_match .groups ()[0 ].lower ()
0 commit comments