From 6b942a6f5de4ec2ac8e0185b4917e72797fc4c1b Mon Sep 17 00:00:00 2001 From: Roaan Vos Date: Mon, 13 Jun 2022 14:46:59 +1000 Subject: [PATCH 1/3] gh-74668: Fix encoded unicode in url byte string --- Lib/test/test_urlparse.py | 2 ++ Lib/urllib/parse.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 2f629c72ae784e..626f93f457c345 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -36,6 +36,7 @@ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), (b";a=b", [(b';a', b'b')]), (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), + (b"a=a\xe2\x80\x99b", [(b'a', b'a\xe2\x80\x99b')]), ] # Each parse_qs testcase is a two-tuple that contains @@ -66,6 +67,7 @@ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), (b";a=b", {b';a': [b'b']}), (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), + (b"a=a\xe2\x80\x99b", {b'a': [b'a\xe2\x80\x99b']}), ] class UrlParseTestCase(unittest.TestCase): diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index d70a6943f0a739..0d46e7e6cb6bb6 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -107,7 +107,7 @@ def _decode_args(args, encoding=_implicit_encoding, errors=_implicit_errors): return tuple(x.decode(encoding, errors) if x else '' for x in args) -def _coerce_args(*args): +def _coerce_args(*args, encoding=_implicit_encoding): # Invokes decode if necessary to create str args # and returns the coerced inputs along with # an appropriate result coercion function @@ -121,7 +121,7 @@ def _coerce_args(*args): raise TypeError("Cannot mix str and non-str arguments") if str_input: return args + (_noop,) - return _decode_args(args) + (_encode_result,) + return _decode_args(args, encoding=encoding) + (functools.partial(_encode_result, encoding=encoding),) # Result objects are more helpful than simple tuples class _ResultMixinStr(object): @@ -730,8 +730,8 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, Returns a list, as G-d intended. """ - qs, _coerce_result = _coerce_args(qs) - separator, _ = _coerce_args(separator) + qs, _coerce_result = _coerce_args(qs, encoding=encoding) + separator, _ = _coerce_args(separator, encoding=encoding) if not separator or (not isinstance(separator, (str, bytes))): raise ValueError("Separator must be of type string or bytes.") From 6078fc5775a3eac709a65098a31499e76fde53c2 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 13 Jun 2022 05:32:29 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2022-06-13-05-32-29.gh-issue-74668.bArBQ1.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-06-13-05-32-29.gh-issue-74668.bArBQ1.rst diff --git a/Misc/NEWS.d/next/Library/2022-06-13-05-32-29.gh-issue-74668.bArBQ1.rst b/Misc/NEWS.d/next/Library/2022-06-13-05-32-29.gh-issue-74668.bArBQ1.rst new file mode 100644 index 00000000000000..8623b2d6db3327 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-06-13-05-32-29.gh-issue-74668.bArBQ1.rst @@ -0,0 +1 @@ +Parse query string containing unicode characters passed as byte string. From 15d1a2adb2209a48f81d309429323dec556eeef1 Mon Sep 17 00:00:00 2001 From: Roaan Vos Date: Mon, 13 Jun 2022 15:49:36 +1000 Subject: [PATCH 3/3] Updated test scenarios to correctly illustrate parsing encoded urls --- Lib/test/test_urlparse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 626f93f457c345..11e90097885ccf 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -36,7 +36,7 @@ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), (b";a=b", [(b';a', b'b')]), (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), - (b"a=a\xe2\x80\x99b", [(b'a', b'a\xe2\x80\x99b')]), + (b"a=a%E2%80%99b", [(b'a', b'a\xe2\x80\x99b')]), ] # Each parse_qs testcase is a two-tuple that contains @@ -67,7 +67,7 @@ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), (b";a=b", {b';a': [b'b']}), (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), - (b"a=a\xe2\x80\x99b", {b'a': [b'a\xe2\x80\x99b']}), + (b"a=a%E2%80%99b", {b'a': [b'a\xe2\x80\x99b']}), ] class UrlParseTestCase(unittest.TestCase):