Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 36b1a20

Browse files
miss-islingtonserhiy-storchakacorona10
authored
[3.12] gh-66543: Fix mimetype.guess_type() (GH-117217) (GH-117256)
Fix parsing of the following corner cases: * URLs with only a host name * URLs containing a fragment * URLs containing a query * filenames with only a UNC sharepoint on Windows (cherry picked from commit 9654daf) Co-authored-by: Serhiy Storchaka <[email protected]> Co-authored-by: Dong-hee Na <[email protected]>
1 parent 5451446 commit 36b1a20

File tree

4 files changed

+45
-9
lines changed

4 files changed

+45
-9
lines changed

Lib/mimetypes.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,13 @@ def guess_type(self, url, strict=True):
120120
but non-standard types.
121121
"""
122122
url = os.fspath(url)
123-
scheme, url = urllib.parse._splittype(url)
123+
p = urllib.parse.urlparse(url)
124+
if p.scheme and len(p.scheme) > 1:
125+
scheme = p.scheme
126+
url = p.path
127+
else:
128+
scheme = None
129+
url = os.path.splitdrive(url)[1]
124130
if scheme == 'data':
125131
# syntax of data URLs:
126132
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data

Lib/test/test_mimetypes.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import io
22
import mimetypes
3+
import os
34
import pathlib
45
import sys
56
import unittest.mock
@@ -111,15 +112,40 @@ def test_filename_with_url_delimiters(self):
111112
# compared to when interpreted as filename because of the semicolon.
112113
eq = self.assertEqual
113114
gzip_expected = ('application/x-tar', 'gzip')
114-
eq(self.db.guess_type(";1.tar.gz"), gzip_expected)
115-
eq(self.db.guess_type("?1.tar.gz"), gzip_expected)
116-
eq(self.db.guess_type("#1.tar.gz"), gzip_expected)
117-
eq(self.db.guess_type("#1#.tar.gz"), gzip_expected)
118-
eq(self.db.guess_type(";1#.tar.gz"), gzip_expected)
119-
eq(self.db.guess_type(";&1=123;?.tar.gz"), gzip_expected)
120-
eq(self.db.guess_type("?k1=v1&k2=v2.tar.gz"), gzip_expected)
115+
for name in (
116+
';1.tar.gz',
117+
'?1.tar.gz',
118+
'#1.tar.gz',
119+
'#1#.tar.gz',
120+
';1#.tar.gz',
121+
';&1=123;?.tar.gz',
122+
'?k1=v1&k2=v2.tar.gz',
123+
):
124+
for prefix in ('', '/', '\\',
125+
'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\',
126+
'//share/server/', '\\\\share\\server\\'):
127+
path = prefix + name
128+
with self.subTest(path=path):
129+
eq(self.db.guess_type(path), gzip_expected)
130+
expected = (None, None) if os.name == 'nt' else gzip_expected
131+
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
132+
path = prefix + name
133+
with self.subTest(path=path):
134+
eq(self.db.guess_type(path), expected)
121135
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
122136

137+
def test_url(self):
138+
result = self.db.guess_type('http://host.html')
139+
msg = 'URL only has a host name, not a file'
140+
self.assertSequenceEqual(result, (None, None), msg)
141+
result = self.db.guess_type('http://example.com/host.html')
142+
msg = 'Should be text/html'
143+
self.assertSequenceEqual(result, ('text/html', None), msg)
144+
result = self.db.guess_type('http://example.com/host.html#x.tar')
145+
self.assertSequenceEqual(result, ('text/html', None))
146+
result = self.db.guess_type('http://example.com/host.html?q=x.tar')
147+
self.assertSequenceEqual(result, ('text/html', None))
148+
123149
def test_guess_all_types(self):
124150
# First try strict. Use a set here for testing the results because if
125151
# test_urllib2 is run before test_mimetypes, global state is modified

Lib/test/test_urllib2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ def connect_ftp(self, user, passwd, host, port, dirs,
776776
["foo", "bar"], "", None),
777777
("ftp://localhost/baz.gif;type=a",
778778
"localhost", ftplib.FTP_PORT, "", "", "A",
779-
[], "baz.gif", None), # XXX really this should guess image/gif
779+
[], "baz.gif", "image/gif"),
780780
]:
781781
req = Request(url)
782782
req.timeout = None
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Make :func:`mimetypes.guess_type` properly parsing of URLs with only a host
2+
name, URLs containing fragment or query, and filenames with only a UNC
3+
sharepoint on Windows.
4+
Based on patch by Dong-hee Na.

0 commit comments

Comments
 (0)