Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 79fca8e

Browse files
committed
Fix for an Issue #268
1 parent 8410fc5 commit 79fca8e

3 files changed

Lines changed: 5 additions & 5 deletions

File tree

lib/core/settings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,10 @@
222222
)
223223

224224
# Regular expression used for parsing charset info from meta html headers
225-
META_CHARSET_REGEX = r'<meta http-equiv="?content-type"?[^>]+charset=(?P<result>[^">]+)'
225+
META_CHARSET_REGEX = r'(?si)<head>.*<meta http-equiv="?content-type"?[^>]+charset=(?P<result>[^">]+).*</head>'
226226

227227
# Regular expression used for parsing refresh info from meta html headers
228-
META_REFRESH_REGEX = r'<meta http-equiv="?refresh"?[^>]+content="?[^">]+url=(?P<result>[^">]+)'
228+
META_REFRESH_REGEX = r'(?si)<head>.*<meta http-equiv="?refresh"?[^>]+content="?[^">]+url=(?P<result>[^">]+).*</head>'
229229

230230
# Regular expression used for parsing empty fields in tested form data
231231
EMPTY_FORM_FIELDS_REGEX = r'(&|\A)(?P<result>[^=]+=(&|\Z))'

lib/request/basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def decodePage(page, contentEncoding, contentType):
220220
if contentType and (contentType.find("charset=") != -1):
221221
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
222222

223-
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
223+
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
224224

225225
if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\
226226
or (httpCharset == metaCharset and all([httpCharset, metaCharset])):

lib/request/connect.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,8 @@ def getPage(**kwargs):
372372
page = decodePage(page, responseHeaders.get(HTTPHEADER.CONTENT_ENCODING), responseHeaders.get(HTTPHEADER.CONTENT_TYPE))
373373
status = getUnicode(conn.msg)
374374

375-
if extractRegexResult(META_REFRESH_REGEX, page, re.DOTALL | re.IGNORECASE) and not refreshing:
376-
url = extractRegexResult(META_REFRESH_REGEX, page, re.DOTALL | re.IGNORECASE)
375+
if extractRegexResult(META_REFRESH_REGEX, page) and not refreshing:
376+
url = extractRegexResult(META_REFRESH_REGEX, page)
377377

378378
debugMsg = "got HTML meta refresh header"
379379
logger.debug(debugMsg)

0 commit comments

Comments
 (0)