From cd8b23154632309fe2b080765db501f3000261d3 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 16 Mar 2019 12:18:31 +0200 Subject: [PATCH 1/7] bpo-35100: Improve the urllib.parse module * Add unquote_to_bytes_plus to urllib.parse module --- Doc/library/urllib.parse.rst | 10 ++++++++++ Lib/test/test_urlparse.py | 6 ++++++ Lib/urllib/parse.py | 14 ++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index af15f5bbfff3a2..2c1c3ace050903 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -575,6 +575,16 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote_to_bytes('a%26%EF')`` yields ``b'a&\xef'``. +.. function:: unquote_to_bytes_plus(string) + + Like :func:`unquote_to_bytes`, but also replace plus signs by spaces, as required for + unquoting HTML form values. + + *string* must be a :class:`str`. + + Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Niño/'``. + + .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ errors=None, quote_via=quote_plus) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 0faf2bbb645924..a3d5f0367affe9 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -925,6 +925,12 @@ def test_unquote_to_bytes(self): result = urllib.parse.unquote_to_bytes('') self.assertEqual(result, b'') + def test_unquote_to_bytes_plus(self): + result = urllib.parse.unquote_to_bytes_plus('abc%20def') + self.assertEqual(result, b'abc def') + result = urllib.parse.unquote_to_bytes_plus('abc+def') + self.assertEqual(result, b'abc def') + def test_quote_errors(self): self.assertRaises(TypeError, urllib.parse.quote, b'foo', encoding='utf-8') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 8b6c9b10609152..f7d3a5f48a91fc 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -36,8 +36,9 @@ "urlsplit", "urlunsplit", "urlencode", "parse_qs", "parse_qsl", "quote", "quote_plus", "quote_from_bytes", "unquote", "unquote_plus", "unquote_to_bytes", - "DefragResult", "ParseResult", "SplitResult", - "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] + "unquote_to_bytes_plus", "DefragResult", "ParseResult", + "SplitResult", "DefragResultBytes", "ParseResultBytes", + "SplitResultBytes"] # A classification of schemes. # The empty string classifies URLs with no scheme specified, @@ -752,6 +753,15 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): string = string.replace('+', ' ') return unquote(string, encoding, errors) +def unquote_to_bytes_plus(string): + """Like unquote_to_bytes(), but also replace plus signs by spaces, as + required for unquoting HTML form values. + + unquote_to_bytes_plus('abc+def') -> b'abc def' + """ + string = string.replace('+', ' ') + return unquote_to_bytes(string) + _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'abcdefghijklmnopqrstuvwxyz' b'0123456789' From bb4eb93232e71ceb6cf1ae50c2cdebfc73f01e23 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 18 Mar 2019 19:28:12 +0200 Subject: [PATCH 2/7] fix: address review comments --- Doc/library/urllib.parse.rst | 6 +++--- Lib/urllib/parse.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 2c1c3ace050903..d5f60a454dbd4b 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -577,12 +577,12 @@ task isn't already covered by the URL parsing functions above. .. function:: unquote_to_bytes_plus(string) - Like :func:`unquote_to_bytes`, but also replace plus signs by spaces, as required for - unquoting HTML form values. + Like :func:`unquote_to_bytes`, but also replace plus signs with spaces, as + required for unquoting HTML form values. *string* must be a :class:`str`. - Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Niño/'``. + Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Ni\xc3\xb1o'``. .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f7d3a5f48a91fc..ccc603f80207cd 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -754,10 +754,10 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): return unquote(string, encoding, errors) def unquote_to_bytes_plus(string): - """Like unquote_to_bytes(), but also replace plus signs by spaces, as + """Like unquote_to_bytes(), but also replace plus signs with spaces, as required for unquoting HTML form values. - unquote_to_bytes_plus('abc+def') -> b'abc def' + unquote_to_bytes_plus('%7e/abc+def') -> b'~/abc def' """ string = string.replace('+', ' ') return unquote_to_bytes(string) From 9f8867d8d234059d3e91ebb053f4c65d31f69d9e Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 19 Mar 2019 10:56:39 +0200 Subject: [PATCH 3/7] fix: remove trailing whitespace --- Doc/library/urllib.parse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index d5f60a454dbd4b..8752693f562a36 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -577,7 +577,7 @@ task isn't already covered by the URL parsing functions above. .. function:: unquote_to_bytes_plus(string) - Like :func:`unquote_to_bytes`, but also replace plus signs with spaces, as + Like :func:`unquote_to_bytes`, but also replace plus signs with spaces, as required for unquoting HTML form values. *string* must be a :class:`str`. From a184fb1562c966dcfd488c0da2f62d2eb6fc090a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" Date: Mon, 10 Jun 2019 20:20:14 +0000 Subject: [PATCH 4/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst diff --git a/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst new file mode 100644 index 00000000000000..ad16b58a2a7cc2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst @@ -0,0 +1 @@ +Add new :func: `unquote_to_bytes_plus` to the urllib.parse module. Patch contributed by Andrew Gates. \ No newline at end of file From ffea7010cc86f3a70616684511610943b2fe572b Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 10 Jun 2019 22:22:53 +0200 Subject: [PATCH 5/7] doc: add versionadded directive to docs --- Doc/library/urllib.parse.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 78227f9d0f7096..4e95b17a0b29af 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -612,7 +612,8 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Ni\xc3\xb1o'``. - + .. versionadded:: 3.9 + .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ errors=None, quote_via=quote_plus) From 7174554b81da7577f34a669dc063aebde21b0281 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 11 Jun 2019 12:35:00 +0200 Subject: [PATCH 6/7] doc: fix pesky trailing spaces --- Doc/library/urllib.parse.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 4e95b17a0b29af..f86892e338b9c7 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -613,7 +613,8 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Ni\xc3\xb1o'``. .. versionadded:: 3.9 - + + .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ errors=None, quote_via=quote_plus) From e804eb8f8729821e86fff950946e40117a2cb8a4 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 11 Jun 2019 14:00:18 +0200 Subject: [PATCH 7/7] doc: fix whitespace issues in news entry --- .../next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst index ad16b58a2a7cc2..1f2288463ec79d 100644 --- a/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst +++ b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst @@ -1 +1 @@ -Add new :func: `unquote_to_bytes_plus` to the urllib.parse module. Patch contributed by Andrew Gates. \ No newline at end of file +Add :func:`urllib.parse.unquote_to_bytes_plus`. Patch contributed by Andrew Gates. \ No newline at end of file