diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 49276daa7ff43f..f86892e338b9c7 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -603,6 +603,18 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote_to_bytes('a%26%EF')`` yields ``b'a&\xef'``. +.. function:: unquote_to_bytes_plus(string) + + Like :func:`unquote_to_bytes`, but also replace plus signs with spaces, as + required for unquoting HTML form values. + + *string* must be a :class:`str`. + + Example: ``unquote_to_bytes_plus('/El+Ni%C3%B1o/')`` yields ``b'/El Ni\xc3\xb1o'``. + + .. versionadded:: 3.9 + + .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ errors=None, quote_via=quote_plus) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4ae6ed33858ce2..242293b2684a03 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -925,6 +925,12 @@ def test_unquote_to_bytes(self): result = urllib.parse.unquote_to_bytes('') self.assertEqual(result, b'') + def test_unquote_to_bytes_plus(self): + result = urllib.parse.unquote_to_bytes_plus('abc%20def') + self.assertEqual(result, b'abc def') + result = urllib.parse.unquote_to_bytes_plus('abc+def') + self.assertEqual(result, b'abc def') + def test_quote_errors(self): self.assertRaises(TypeError, urllib.parse.quote, b'foo', encoding='utf-8') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b6608783a89471..58e9775e987c18 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -36,8 +36,9 @@ "urlsplit", "urlunsplit", "urlencode", "parse_qs", "parse_qsl", "quote", "quote_plus", "quote_from_bytes", "unquote", "unquote_plus", "unquote_to_bytes", - "DefragResult", "ParseResult", "SplitResult", - "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] + "unquote_to_bytes_plus", "DefragResult", "ParseResult", + "SplitResult", "DefragResultBytes", "ParseResultBytes", + "SplitResultBytes"] # A classification of schemes. # The empty string classifies URLs with no scheme specified, @@ -755,6 +756,15 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): string = string.replace('+', ' ') return unquote(string, encoding, errors) +def unquote_to_bytes_plus(string): + """Like unquote_to_bytes(), but also replace plus signs with spaces, as + required for unquoting HTML form values. + + unquote_to_bytes_plus('%7e/abc+def') -> b'~/abc def' + """ + string = string.replace('+', ' ') + return unquote_to_bytes(string) + _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'abcdefghijklmnopqrstuvwxyz' b'0123456789' diff --git a/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst new file mode 100644 index 00000000000000..1f2288463ec79d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-10-20-20-11.bpo-35100.kVNUfa.rst @@ -0,0 +1 @@ +Add :func:`urllib.parse.unquote_to_bytes_plus`. Patch contributed by Andrew Gates. \ No newline at end of file