Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d7e4705

Browse files
committed
mime types guesser
1 parent fc167c6 commit d7e4705

1 file changed

Lines changed: 190 additions & 0 deletions

File tree

Tools/webchecker/mimetypes.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
"""Guess the MIME type of a file.
2+
3+
This module defines one useful function:
4+
5+
guess_type(url) -- guess the MIME type and encoding of a URL.
6+
7+
It also contains the following, for tuning the behavior:
8+
9+
Data:
10+
11+
knownfiles -- list of files to parse
12+
inited -- flag set when init() has been called
13+
suffixes_map -- dictionary mapping suffixes to suffixes
14+
encodings_map -- dictionary mapping suffixes to encodings
15+
types_map -- dictionary mapping suffixes to types
16+
17+
Functions:
18+
19+
init([files]) -- parse a list of files, default knownfiles
20+
read_mime_types(file) -- parse one file, return a dictionary or None
21+
22+
"""
23+
24+
import string
25+
import posixpath
26+
27+
knownfiles = [
28+
"/usr/local/etc/httpd/conf/mime.types",
29+
"/usr/local/lib/netscape/mime.types",
30+
]
31+
32+
inited = 0
33+
34+
def guess_type(url):
35+
"""Guess the type of a file based on its URL.
36+
37+
Return value is a tuple (type, encoding) where type is None if the
38+
type can't be guessed (no or unknown suffix) or a string of the
39+
form type/subtype, usable for a MIME Content-type header; and
40+
encoding is None for no encoding or the name of the program used
41+
to encode (e.g. compress or gzip). The mappings are table
42+
driven. Encoding suffixes are case sensitive; type suffixes are
43+
first tried case sensitive, then case insensitive.
44+
45+
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
46+
to ".tar.gz". (This is table-driven too, using the dictionary
47+
suffixes_map).
48+
49+
"""
50+
if not inited:
51+
init()
52+
base, ext = posixpath.splitext(url)
53+
while suffix_map.has_key(ext):
54+
base, ext = posixpath.splitext(base + suffix_map[ext])
55+
if encodings_map.has_key(ext):
56+
encoding = encodings_map[ext]
57+
base, ext = posixpath.splitext(base)
58+
else:
59+
encoding = None
60+
if types_map.has_key(ext):
61+
return types_map[ext], encoding
62+
elif types_map.has_key(string.lower(ext)):
63+
return types_map[string.lower(ext)], encoding
64+
else:
65+
return None, encoding
66+
67+
def init(files=None):
68+
global inited
69+
for file in files or knownfiles:
70+
s = read_mime_types(file)
71+
if s:
72+
for key, value in s.items():
73+
types_map[key] = value
74+
inited = 1
75+
76+
def read_mime_types(file):
77+
try:
78+
f = open(file)
79+
except IOError:
80+
return None
81+
map = {}
82+
while 1:
83+
line = f.readline()
84+
if not line: break
85+
words = string.split(line)
86+
for i in range(len(words)):
87+
if words[i][0] == '#':
88+
del words[i:]
89+
break
90+
if not words: continue
91+
type, suffixes = words[0], words[1:]
92+
for suff in suffixes:
93+
map['.'+suff] = type
94+
f.close()
95+
return map
96+
97+
suffix_map = {
98+
'.tgz': '.tar.gz',
99+
'.taz': '.tar.gz',
100+
'.tz': '.tar.gz',
101+
}
102+
103+
encodings_map = {
104+
'.gz': 'gzip',
105+
'.Z': 'compress',
106+
}
107+
108+
types_map = {
109+
'.a': 'application/octet-stream',
110+
'.ai': 'application/postscript',
111+
'.aif': 'audio/x-aiff',
112+
'.aifc': 'audio/x-aiff',
113+
'.aiff': 'audio/x-aiff',
114+
'.au': 'audio/basic',
115+
'.avi': 'video/x-msvideo',
116+
'.bcpio': 'application/x-bcpio',
117+
'.bin': 'application/octet-stream',
118+
'.cdf': 'application/x-netcdf',
119+
'.cpio': 'application/x-cpio',
120+
'.csh': 'application/x-csh',
121+
'.dll': 'application/octet-stream',
122+
'.dvi': 'application/x-dvi',
123+
'.exe': 'application/octet-stream',
124+
'.eps': 'application/postscript',
125+
'.etx': 'text/x-setext',
126+
'.gif': 'image/gif',
127+
'.gtar': 'application/x-gtar',
128+
'.hdf': 'application/x-hdf',
129+
'.htm': 'text/html',
130+
'.html': 'text/html',
131+
'.ief': 'image/ief',
132+
'.jpe': 'image/jpeg',
133+
'.jpeg': 'image/jpeg',
134+
'.jpg': 'image/jpeg',
135+
'.latex': 'application/x-latex',
136+
'.man': 'application/x-troff-man',
137+
'.me': 'application/x-troff-me',
138+
'.mif': 'application/x-mif',
139+
'.mov': 'video/quicktime',
140+
'.movie': 'video/x-sgi-movie',
141+
'.mpe': 'video/mpeg',
142+
'.mpeg': 'video/mpeg',
143+
'.mpg': 'video/mpeg',
144+
'.ms': 'application/x-troff-ms',
145+
'.nc': 'application/x-netcdf',
146+
'.o': 'application/octet-stream',
147+
'.obj': 'application/octet-stream',
148+
'.oda': 'application/oda',
149+
'.pbm': 'image/x-portable-bitmap',
150+
'.pdf': 'application/pdf',
151+
'.pgm': 'image/x-portable-graymap',
152+
'.pnm': 'image/x-portable-anymap',
153+
'.png': 'image/png',
154+
'.ppm': 'image/x-portable-pixmap',
155+
'.py': 'text/x-python',
156+
'.pyc': 'application/x-python-code',
157+
'.ps': 'application/postscript',
158+
'.qt': 'video/quicktime',
159+
'.ras': 'image/x-cmu-raster',
160+
'.rgb': 'image/x-rgb',
161+
'.roff': 'application/x-troff',
162+
'.rtf': 'application/rtf',
163+
'.rtx': 'text/richtext',
164+
'.sgm': 'text/x-sgml',
165+
'.sgml': 'text/x-sgml',
166+
'.sh': 'application/x-sh',
167+
'.shar': 'application/x-shar',
168+
'.snd': 'audio/basic',
169+
'.so': 'application/octet-stream',
170+
'.src': 'application/x-wais-source',
171+
'.sv4cpio': 'application/x-sv4cpio',
172+
'.sv4crc': 'application/x-sv4crc',
173+
'.t': 'application/x-troff',
174+
'.tar': 'application/x-tar',
175+
'.tcl': 'application/x-tcl',
176+
'.tex': 'application/x-tex',
177+
'.texi': 'application/x-texinfo',
178+
'.texinfo': 'application/x-texinfo',
179+
'.tif': 'image/tiff',
180+
'.tiff': 'image/tiff',
181+
'.tr': 'application/x-troff',
182+
'.tsv': 'text/tab-separated-values',
183+
'.txt': 'text/plain',
184+
'.ustar': 'application/x-ustar',
185+
'.wav': 'audio/x-wav',
186+
'.xbm': 'image/x-xbitmap',
187+
'.xpm': 'image/x-xpixmap',
188+
'.xwd': 'image/x-xwindowdump',
189+
'.zip': 'application/zip',
190+
}

0 commit comments

Comments
 (0)