Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 97aa88a

Browse files
gh-66543: Add mimetypes.guess_file_type()
1 parent 9654daf commit 97aa88a

File tree

8 files changed

+127
-35
lines changed

8 files changed

+127
-35
lines changed

Doc/includes/email-dir.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main():
5353
# Guess the content type based on the file's extension. Encoding
5454
# will be ignored, although we should check for simple things like
5555
# gzip'd or compressed files.
56-
ctype, encoding = mimetypes.guess_type(path)
56+
ctype, encoding = mimetypes.guess_file_type(path)
5757
if ctype is None or encoding is not None:
5858
# No guess could be made, or the file is encoded (compressed), so
5959
# use a generic bag-of-bits type.

Doc/library/mimetypes.rst

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,22 @@ the information :func:`init` sets up.
5252
are also recognized.
5353

5454
.. versionchanged:: 3.8
55-
Added support for url being a :term:`path-like object`.
55+
Added support for *url* being a :term:`path-like object`.
56+
57+
.. versionchanged:: 3.13
58+
Passing a file path instead of URL is deprecated.
59+
Use :func:`guess_file_type` for this.
60+
61+
62+
.. function:: guess_file_type(path, *, strict=True)
63+
64+
.. index:: pair: MIME; headers
65+
66+
Guess the type of a file based on its path, given by *path*.
67+
Similar to the :func:`guess_type` function, but accepts a path instead of URL.
68+
Path can be a string, a bytes object or a :term:`path-like object`.
69+
70+
.. versionadded:: 3.13
5671

5772

5873
.. function:: guess_all_extensions(type, strict=True)
@@ -61,7 +76,7 @@ the information :func:`init` sets up.
6176
return value is a list of strings giving all possible filename extensions,
6277
including the leading dot (``'.'``). The extensions are not guaranteed to have
6378
been associated with any particular data stream, but would be mapped to the MIME
64-
type *type* by :func:`guess_type`.
79+
type *type* by :func:`guess_type` and :func:`guess_file_type`.
6580

6681
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
6782

@@ -72,8 +87,8 @@ the information :func:`init` sets up.
7287
return value is a string giving a filename extension, including the leading dot
7388
(``'.'``). The extension is not guaranteed to have been associated with any
7489
particular data stream, but would be mapped to the MIME type *type* by
75-
:func:`guess_type`. If no extension can be guessed for *type*, ``None`` is
76-
returned.
90+
:func:`guess_type` and :func:`guess_file_type`.
91+
If no extension can be guessed for *type*, ``None`` is returned.
7792

7893
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
7994

@@ -238,6 +253,14 @@ than one MIME-type database; it provides an interface similar to the one of the
238253
the object.
239254

240255

256+
.. method:: MimeTypes.guess_type(url, strict=True)
257+
258+
Similar to the :func:`guess_file_type` function, using the tables stored
259+
as part of the object.
260+
261+
.. versionadded:: 3.13
262+
263+
241264
.. method:: MimeTypes.guess_all_extensions(type, strict=True)
242265

243266
Similar to the :func:`guess_all_extensions` function, using the tables stored

Doc/library/wsgiref.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ directory and port number (default: 8000) on the command line::
865865
fn = os.path.join(path, environ["PATH_INFO"][1:])
866866
if "." not in fn.split(os.path.sep)[-1]:
867867
fn = os.path.join(fn, "index.html")
868-
mime_type = mimetypes.guess_type(fn)[0]
868+
mime_type = mimetypes.guess_file_type(fn)[0]
869869

870870
# Return 200 OK if file exists, otherwise 404 Not Found
871871
if os.path.exists(fn):

Doc/whatsnew/3.13.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,13 @@ provided by C99, and follows the specification of the IEEE 754
430430
"fusedMultiplyAdd" operation for special cases.
431431
(Contributed by Mark Dickinson and Victor Stinner in :gh:`73468`.)
432432

433+
mimetypes
434+
---------
435+
436+
* Add the :func:`~mimetypes.guess_file_type` function which works with file path.
437+
Passing file path instead of URL in :func:`~mimetypes.guess_type` is deprecated.
438+
(Contributed by Serhiy Storchaka in :gh:`66543`.)
439+
433440
mmap
434441
----
435442

@@ -885,6 +892,10 @@ Deprecated
885892
and was only useful for Jython support.
886893
(Contributed by Nikita Sobolev in :gh:`116349`.)
887894

895+
* Passing file path instead of URL in :func:`~mimetypes.guess_type` is deprecated.
896+
Use :func:`~mimetypes.guess_file_type` instead.
897+
(Contributed by Serhiy Storchaka in :gh:`66543`.)
898+
888899
Pending Removal in Python 3.14
889900
------------------------------
890901

Lib/http/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,7 @@ def guess_type(self, path):
897897
ext = ext.lower()
898898
if ext in self.extensions_map:
899899
return self.extensions_map[ext]
900-
guess, _ = mimetypes.guess_type(path)
900+
guess, _ = mimetypes.guess_file_type(path)
901901
if guess:
902902
return guess
903903
return 'application/octet-stream'

Lib/mimetypes.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
__all__ = [
4242
"knownfiles", "inited", "MimeTypes",
43-
"guess_type", "guess_all_extensions", "guess_extension",
43+
"guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
4444
"add_type", "init", "read_mime_types",
4545
"suffix_map", "encodings_map", "types_map", "common_types"
4646
]
@@ -119,14 +119,14 @@ def guess_type(self, url, strict=True):
119119
Optional `strict' argument when False adds a bunch of commonly found,
120120
but non-standard types.
121121
"""
122+
# TODO: Deprecate accepting file paths (in particular path-like objects).
122123
url = os.fspath(url)
123124
p = urllib.parse.urlparse(url)
124125
if p.scheme and len(p.scheme) > 1:
125126
scheme = p.scheme
126127
url = p.path
127128
else:
128-
scheme = None
129-
url = os.path.splitdrive(url)[1]
129+
return self.guess_file_type(url, strict=strict)
130130
if scheme == 'data':
131131
# syntax of data URLs:
132132
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
@@ -146,13 +146,25 @@ def guess_type(self, url, strict=True):
146146
if '=' in type or '/' not in type:
147147
type = 'text/plain'
148148
return type, None # never compressed, so encoding is None
149-
base, ext = posixpath.splitext(url)
149+
return self._guess_file_type(url, strict, posixpath.splitext)
150+
151+
def guess_file_type(self, path, *, strict=True):
152+
"""Guess the type of a file based on its path.
153+
154+
Similar to guess_type(), but takes file path istead of URL.
155+
"""
156+
path = os.fsdecode(path)
157+
path = os.path.splitdrive(path)[1]
158+
return self._guess_file_type(path, strict, os.path.splitext)
159+
160+
def _guess_file_type(self, path, strict, splitext):
161+
base, ext = splitext(path)
150162
while (ext_lower := ext.lower()) in self.suffix_map:
151-
base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
163+
base, ext = splitext(base + self.suffix_map[ext_lower])
152164
# encodings_map is case sensitive
153165
if ext in self.encodings_map:
154166
encoding = self.encodings_map[ext]
155-
base, ext = posixpath.splitext(base)
167+
base, ext = splitext(base)
156168
else:
157169
encoding = None
158170
ext = ext.lower()
@@ -310,6 +322,16 @@ def guess_type(url, strict=True):
310322
return _db.guess_type(url, strict)
311323

312324

325+
def guess_file_type(path, *, strict=True):
326+
"""Guess the type of a file based on its path.
327+
328+
Similar to guess_type(), but takes file path istead of URL.
329+
"""
330+
if _db is None:
331+
init()
332+
return _db.guess_file_type(path, strict=strict)
333+
334+
313335
def guess_all_extensions(type, strict=True):
314336
"""Guess the extensions for a file based on its MIME type.
315337

Lib/test/test_mimetypes.py

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,28 @@ def setUp(self):
3636

3737
def test_case_sensitivity(self):
3838
eq = self.assertEqual
39-
eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))
40-
eq(self.db.guess_type("foobar.TGZ"), self.db.guess_type("foobar.tgz"))
41-
eq(self.db.guess_type("foobar.tar.Z"), ("application/x-tar", "compress"))
42-
eq(self.db.guess_type("foobar.tar.z"), (None, None))
39+
eq(self.db.guess_file_type("foobar.html"), ("text/html", None))
40+
eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None))
41+
eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None))
42+
eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None))
43+
eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip"))
44+
eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip"))
45+
eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip"))
46+
eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip"))
47+
eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress"))
48+
eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress"))
49+
eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
50+
eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
4351

4452
def test_default_data(self):
4553
eq = self.assertEqual
46-
eq(self.db.guess_type("foo.html"), ("text/html", None))
47-
eq(self.db.guess_type("foo.HTML"), ("text/html", None))
48-
eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip"))
49-
eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip"))
50-
eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress"))
51-
eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
52-
eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz"))
54+
eq(self.db.guess_file_type("foo.html"), ("text/html", None))
55+
eq(self.db.guess_file_type("foo.HTML"), ("text/html", None))
56+
eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip"))
57+
eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip"))
58+
eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress"))
59+
eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
60+
eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz"))
5361

5462
def test_data_urls(self):
5563
eq = self.assertEqual
@@ -63,7 +71,7 @@ def test_file_parsing(self):
6371
eq = self.assertEqual
6472
sio = io.StringIO("x-application/x-unittest pyunit\n")
6573
self.db.readfp(sio)
66-
eq(self.db.guess_type("foo.pyunit"),
74+
eq(self.db.guess_file_type("foo.pyunit"),
6775
("x-application/x-unittest", None))
6876
eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit")
6977

@@ -95,12 +103,12 @@ def test_read_mime_types(self):
95103
def test_non_standard_types(self):
96104
eq = self.assertEqual
97105
# First try strict
98-
eq(self.db.guess_type('foo.xul', strict=True), (None, None))
106+
eq(self.db.guess_file_type('foo.xul', strict=True), (None, None))
99107
eq(self.db.guess_extension('image/jpg', strict=True), None)
100108
# And then non-strict
101-
eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None))
102-
eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None))
103-
eq(self.db.guess_type('foo.invalid', strict=False), (None, None))
109+
eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None))
110+
eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None))
111+
eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None))
104112
eq(self.db.guess_extension('image/jpg', strict=False), '.jpg')
105113
eq(self.db.guess_extension('image/JPG', strict=False), '.jpg')
106114

@@ -124,15 +132,24 @@ def test_filename_with_url_delimiters(self):
124132
'//share/server/', '\\\\share\\server\\'):
125133
path = prefix + name
126134
with self.subTest(path=path):
135+
eq(self.db.guess_file_type(path), gzip_expected)
127136
eq(self.db.guess_type(path), gzip_expected)
128137
expected = (None, None) if os.name == 'nt' else gzip_expected
129138
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
130139
path = prefix + name
131140
with self.subTest(path=path):
141+
eq(self.db.guess_file_type(path), expected)
132142
eq(self.db.guess_type(path), expected)
143+
eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
133144
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
145+
eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip'))
146+
eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip'))
147+
eq(self.db.guess_file_type(r'foo\.tar.gz'),
148+
(None, 'gzip') if os.name == 'nt' else gzip_expected)
149+
eq(self.db.guess_type(r'foo\.tar.gz'), gzip_expected)
134150

135151
def test_url(self):
152+
result = self.db.guess_type('http://example.com/host.html')
136153
result = self.db.guess_type('http://host.html')
137154
msg = 'URL only has a host name, not a file'
138155
self.assertSequenceEqual(result, (None, None), msg)
@@ -240,22 +257,38 @@ def test_init_stability(self):
240257

241258
def test_path_like_ob(self):
242259
filename = "LICENSE.txt"
243-
filepath = pathlib.Path(filename)
244-
filepath_with_abs_dir = pathlib.Path('/dir/'+filename)
245-
filepath_relative = pathlib.Path('../dir/'+filename)
246-
path_dir = pathlib.Path('./')
260+
filepath = os_helper.FakePath(filename)
261+
filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename)
262+
filepath_relative = os_helper.FakePath('../dir/'+filename)
263+
path_dir = os_helper.FakePath('./')
247264

248-
expected = self.db.guess_type(filename)
265+
expected = self.db.guess_file_type(filename)
249266

267+
self.assertEqual(self.db.guess_file_type(filepath), expected)
250268
self.assertEqual(self.db.guess_type(filepath), expected)
269+
self.assertEqual(self.db.guess_file_type(
270+
filepath_with_abs_dir), expected)
251271
self.assertEqual(self.db.guess_type(
252272
filepath_with_abs_dir), expected)
273+
self.assertEqual(self.db.guess_file_type(filepath_relative), expected)
253274
self.assertEqual(self.db.guess_type(filepath_relative), expected)
275+
276+
self.assertEqual(self.db.guess_file_type(path_dir), (None, None))
254277
self.assertEqual(self.db.guess_type(path_dir), (None, None))
255278

279+
def test_bytes_path(self):
280+
self.assertEqual(self.db.guess_file_type(b'foo.html'),
281+
self.db.guess_file_type('foo.html'))
282+
self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'),
283+
self.db.guess_file_type('foo.tar.gz'))
284+
self.assertEqual(self.db.guess_file_type(b'foo.tgz'),
285+
self.db.guess_file_type('foo.tgz'))
286+
256287
def test_keywords_args_api(self):
288+
self.assertEqual(self.db.guess_file_type(
289+
path="foo.html", strict=True), ("text/html", None))
257290
self.assertEqual(self.db.guess_type(
258-
url="foo.html", strict=True), ("text/html", None))
291+
url="scheme:foo.html", strict=True), ("text/html", None))
259292
self.assertEqual(self.db.guess_all_extensions(
260293
type='image/jpg', strict=True), [])
261294
self.assertEqual(self.db.guess_extension(
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add the :func:`mimetypes.guess_file_type` function which works with file
2+
path. Passing file path instead of URL in :func:`~mimetypes.guess_type` is
3+
deprecated.

0 commit comments

Comments
 (0)