Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit df022da

Browse files
committed
Fix Issue5468 - urlencode to handle bytes and other alternate encodings.
(Extensive tests provided). Patch by Dan Mahn.
1 parent 8502933 commit df022da

4 files changed

Lines changed: 166 additions & 25 deletions

File tree

Doc/library/urllib.parse.rst

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -310,23 +310,29 @@ The :mod:`urllib.parse` module defines the following functions:
310310
``b'a&\xef'``.
311311

312312

313-
.. function:: urlencode(query, doseq=False)
314-
315-
Convert a mapping object or a sequence of two-element tuples to a
316-
"url-encoded" string, suitable to pass to :func:`urlopen` above as the
317-
optional *data* argument. This is useful to pass a dictionary of form
318-
fields to a ``POST`` request. The resulting string is a series of
319-
``key=value`` pairs separated by ``'&'`` characters, where both *key* and
320-
*value* are quoted using :func:`quote_plus` above. When a sequence of
321-
two-element tuples is used as the *query* argument, the first element of
322-
each tuple is a key and the second is a value. The value element in itself
323-
can be a sequence and in that case, if the optional parameter *doseq* is
324-
evaluates to *True*, individual ``key=value`` pairs separated by ``'&'`` are
325-
generated for each element of the value sequence for the key. The order of
326-
parameters in the encoded string will match the order of parameter tuples in
327-
the sequence. This module provides the functions :func:`parse_qs` and
328-
:func:`parse_qsl` which are used to parse query strings into Python data
329-
structures.
313+
.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
314+
315+
Convert a mapping object or a sequence of two-element tuples, which may
316+
either be a :class:`str` or a :class:`bytes`, to a "url-encoded" string,
317+
suitable to pass to :func:`urlopen` above as the optional *data* argument.
318+
This is useful to pass a dictionary of form fields to a ``POST`` request.
319+
The resulting string is a series of ``key=value`` pairs separated by ``'&'``
320+
characters, where both *key* and *value* are quoted using :func:`quote_plus`
321+
above. When a sequence of two-element tuples is used as the *query*
322+
argument, the first element of each tuple is a key and the second is a
323+
value. The value element in itself can be a sequence and in that case, if
324+
the optional parameter *doseq* is evaluates to *True*, individual
325+
``key=value`` pairs separated by ``'&'`` are generated for each element of
326+
the value sequence for the key. The order of parameters in the encoded
327+
string will match the order of parameter tuples in the sequence. This module
328+
provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
329+
to parse query strings into Python data structures.
330+
331+
When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
332+
parameters are sent the :func:`quote_plus` for encoding.
333+
334+
.. versionchanged:: 3.2
335+
query paramater supports bytes and string.
330336

331337

332338
.. seealso::

Lib/test/test_urllib.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,116 @@ def test_nonstring_seq_values(self):
795795
self.assertEqual("a=a&a=b",
796796
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
797797

798+
def test_urlencode_encoding(self):
799+
# ASCII encoding. Expect %3F with errors="replace'
800+
given = (('\u00a0', '\u00c1'),)
801+
expect = '%3F=%3F'
802+
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
803+
self.assertEqual(expect, result)
804+
805+
# Default is UTF-8 encoding.
806+
given = (('\u00a0', '\u00c1'),)
807+
expect = '%C2%A0=%C3%81'
808+
result = urllib.parse.urlencode(given)
809+
self.assertEqual(expect, result)
810+
811+
# Latin-1 encoding.
812+
given = (('\u00a0', '\u00c1'),)
813+
expect = '%A0=%C1'
814+
result = urllib.parse.urlencode(given, encoding="latin-1")
815+
self.assertEqual(expect, result)
816+
817+
def test_urlencode_encoding_doseq(self):
818+
# ASCII Encoding. Expect %3F with errors="replace'
819+
given = (('\u00a0', '\u00c1'),)
820+
expect = '%3F=%3F'
821+
result = urllib.parse.urlencode(given, doseq=True,
822+
encoding="ASCII", errors="replace")
823+
self.assertEqual(expect, result)
824+
825+
# ASCII Encoding. On a sequence of values.
826+
given = (("\u00a0", (1, "\u00c1")),)
827+
expect = '%3F=1&%3F=%3F'
828+
result = urllib.parse.urlencode(given, True,
829+
encoding="ASCII", errors="replace")
830+
self.assertEqual(expect, result)
831+
832+
# Utf-8
833+
given = (("\u00a0", "\u00c1"),)
834+
expect = '%C2%A0=%C3%81'
835+
result = urllib.parse.urlencode(given, True)
836+
self.assertEqual(expect, result)
837+
838+
given = (("\u00a0", (42, "\u00c1")),)
839+
expect = '%C2%A0=42&%C2%A0=%C3%81'
840+
result = urllib.parse.urlencode(given, True)
841+
self.assertEqual(expect, result)
842+
843+
# latin-1
844+
given = (("\u00a0", "\u00c1"),)
845+
expect = '%A0=%C1'
846+
result = urllib.parse.urlencode(given, True, encoding="latin-1")
847+
self.assertEqual(expect, result)
848+
849+
given = (("\u00a0", (42, "\u00c1")),)
850+
expect = '%A0=42&%A0=%C1'
851+
result = urllib.parse.urlencode(given, True, encoding="latin-1")
852+
self.assertEqual(expect, result)
853+
854+
def test_urlencode_bytes(self):
855+
given = ((b'\xa0\x24', b'\xc1\x24'),)
856+
expect = '%A0%24=%C1%24'
857+
result = urllib.parse.urlencode(given)
858+
self.assertEqual(expect, result)
859+
result = urllib.parse.urlencode(given, True)
860+
self.assertEqual(expect, result)
861+
862+
# Sequence of values
863+
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
864+
expect = '%A0%24=42&%A0%24=%C1%24'
865+
result = urllib.parse.urlencode(given, True)
866+
self.assertEqual(expect, result)
867+
868+
def test_urlencode_encoding_safe_parameter(self):
869+
870+
# Send '$' (\x24) as safe character
871+
# Default utf-8 encoding
872+
873+
given = ((b'\xa0\x24', b'\xc1\x24'),)
874+
result = urllib.parse.urlencode(given, safe=":$")
875+
expect = '%A0$=%C1$'
876+
self.assertEqual(expect, result)
877+
878+
given = ((b'\xa0\x24', b'\xc1\x24'),)
879+
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
880+
expect = '%A0$=%C1$'
881+
self.assertEqual(expect, result)
882+
883+
# Safe parameter in sequence
884+
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
885+
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
886+
result = urllib.parse.urlencode(given, True, safe=":$")
887+
self.assertEqual(expect, result)
888+
889+
# Test all above in latin-1 encoding
890+
891+
given = ((b'\xa0\x24', b'\xc1\x24'),)
892+
result = urllib.parse.urlencode(given, safe=":$",
893+
encoding="latin-1")
894+
expect = '%A0$=%C1$'
895+
self.assertEqual(expect, result)
896+
897+
given = ((b'\xa0\x24', b'\xc1\x24'),)
898+
expect = '%A0$=%C1$'
899+
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
900+
encoding="latin-1")
901+
902+
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
903+
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
904+
result = urllib.parse.urlencode(given, True, safe=":$",
905+
encoding="latin-1")
906+
self.assertEqual(expect, result)
907+
798908
class Pathname_Tests(unittest.TestCase):
799909
"""Test pathname2url() and url2pathname()"""
800910

Lib/urllib/parse.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def quote_from_bytes(bs, safe='/'):
559559
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
560560
return ''.join([quoter(char) for char in bs])
561561

562-
def urlencode(query, doseq=False):
562+
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
563563
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
564564
565565
If any values in the query arg are sequences and doseq is true, each
@@ -568,6 +568,10 @@ def urlencode(query, doseq=False):
568568
If the query arg is a sequence of two-element tuples, the order of the
569569
parameters in the output will match the order of parameters in the
570570
input.
571+
572+
The query arg may be either a string or a bytes type. When query arg is a
573+
string, the safe, encoding and error parameters are sent the quote_plus for
574+
encoding.
571575
"""
572576

573577
if hasattr(query, "items"):
@@ -592,27 +596,45 @@ def urlencode(query, doseq=False):
592596
l = []
593597
if not doseq:
594598
for k, v in query:
595-
k = quote_plus(str(k))
596-
v = quote_plus(str(v))
599+
if isinstance(k, bytes):
600+
k = quote_plus(k, safe)
601+
else:
602+
k = quote_plus(str(k), safe, encoding, errors)
603+
604+
if isinstance(v, bytes):
605+
v = quote_plus(v, safe)
606+
else:
607+
v = quote_plus(str(v), safe, encoding, errors)
597608
l.append(k + '=' + v)
598609
else:
599610
for k, v in query:
600-
k = quote_plus(str(k))
601-
if isinstance(v, str):
602-
v = quote_plus(v)
611+
if isinstance(k, bytes):
612+
k = quote_plus(k, safe)
613+
else:
614+
k = quote_plus(str(k), safe, encoding, errors)
615+
616+
if isinstance(v, bytes):
617+
v = quote_plus(v, safe)
618+
l.append(k + '=' + v)
619+
elif isinstance(v, str):
620+
v = quote_plus(v, safe, encoding, errors)
603621
l.append(k + '=' + v)
604622
else:
605623
try:
606624
# Is this a sufficient test for sequence-ness?
607625
x = len(v)
608626
except TypeError:
609627
# not a sequence
610-
v = quote_plus(str(v))
628+
v = quote_plus(str(v), safe, encoding, errors)
611629
l.append(k + '=' + v)
612630
else:
613631
# loop over the sequence
614632
for elt in v:
615-
l.append(k + '=' + quote_plus(str(elt)))
633+
if isinstance(elt, bytes):
634+
elt = quote_plus(elt, safe)
635+
else:
636+
elt = quote_plus(str(elt), safe, encoding, errors)
637+
l.append(k + '=' + elt)
616638
return '&'.join(l)
617639

618640
# Utilities to parse URLs (most of these return None for missing parts):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,9 @@ C-API
468468
Library
469469
-------
470470

471+
- Issue #5468: urlencode to handle bytes type and other encodings in its query
472+
parameter. Patch by Dan Mahn.
473+
471474
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
472475
module, ensure that the input string length is a multiple of the frame size
473476

0 commit comments

Comments
 (0)