mime types guesser

gvanrossum · gvanrossum · commit d7e4705d8fd5 · 1997-01-30T02:44:20.000Z
diff --git a/Tools/webchecker/mimetypes.py b/Tools/webchecker/mimetypes.py
@@ -0,0 +1,190 @@
+"""Guess the MIME type of a file.
+
+This module defines one useful function:
+
+guess_type(url) -- guess the MIME type and encoding of a URL.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffixes_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles
+read_mime_types(file) -- parse one file, return a dictionary or None
+
+"""
+
+import string
+import posixpath
+
+knownfiles = [
+    "/usr/local/etc/httpd/conf/mime.types",
+    "/usr/local/lib/netscape/mime.types",
+    ]
+
+inited = 0
+
+def guess_type(url):
+    """Guess the type of a file based on its URL.
+
+    Return value is a tuple (type, encoding) where type is None if the
+    type can't be guessed (no or unknown suffix) or a string of the
+    form type/subtype, usable for a MIME Content-type header; and
+    encoding is None for no encoding or the name of the program used
+    to encode (e.g. compress or gzip).  The mappings are table
+    driven.  Encoding suffixes are case sensitive; type suffixes are
+    first tried case sensitive, then case insensitive.
+
+    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+    to ".tar.gz".  (This is table-driven too, using the dictionary
+    suffixes_map).
+
+    """
+    if not inited:
+	init()
+    base, ext = posixpath.splitext(url)
+    while suffix_map.has_key(ext):
+	base, ext = posixpath.splitext(base + suffix_map[ext])
+    if encodings_map.has_key(ext):
+	encoding = encodings_map[ext]
+	base, ext = posixpath.splitext(base)
+    else:
+	encoding = None
+    if types_map.has_key(ext):
+	return types_map[ext], encoding
+    elif types_map.has_key(string.lower(ext)):
+	return types_map[string.lower(ext)], encoding
+    else:
+	return None, encoding
+
+def init(files=None):
+    global inited
+    for file in files or knownfiles:
+	s = read_mime_types(file)
+	if s:
+	    for key, value in s.items():
+		types_map[key] = value
+    inited = 1
+
+def read_mime_types(file):
+    try:
+	f = open(file)
+    except IOError:
+	return None
+    map = {}
+    while 1:
+	line = f.readline()
+	if not line: break
+	words = string.split(line)
+	for i in range(len(words)):
+	    if words[i][0] == '#':
+		del words[i:]
+		break
+	if not words: continue
+	type, suffixes = words[0], words[1:]
+	for suff in suffixes:
+	    map['.'+suff] = type
+    f.close()
+    return map
+
+suffix_map = {
+    '.tgz': '.tar.gz',
+    '.taz': '.tar.gz',
+    '.tz': '.tar.gz',
+}
+
+encodings_map = {
+    '.gz': 'gzip',
+    '.Z': 'compress',
+    }
+
+types_map = {
+    '.a': 'application/octet-stream',
+    '.ai': 'application/postscript',
+    '.aif': 'audio/x-aiff',
+    '.aifc': 'audio/x-aiff',
+    '.aiff': 'audio/x-aiff',
+    '.au': 'audio/basic',
+    '.avi': 'video/x-msvideo',
+    '.bcpio': 'application/x-bcpio',
+    '.bin': 'application/octet-stream',
+    '.cdf': 'application/x-netcdf',
+    '.cpio': 'application/x-cpio',
+    '.csh': 'application/x-csh',
+    '.dll': 'application/octet-stream',
+    '.dvi': 'application/x-dvi',
+    '.exe': 'application/octet-stream',
+    '.eps': 'application/postscript',
+    '.etx': 'text/x-setext',
+    '.gif': 'image/gif',
+    '.gtar': 'application/x-gtar',
+    '.hdf': 'application/x-hdf',
+    '.htm': 'text/html',
+    '.html': 'text/html',
+    '.ief': 'image/ief',
+    '.jpe': 'image/jpeg',
+    '.jpeg': 'image/jpeg',
+    '.jpg': 'image/jpeg',
+    '.latex': 'application/x-latex',
+    '.man': 'application/x-troff-man',
+    '.me': 'application/x-troff-me',
+    '.mif': 'application/x-mif',
+    '.mov': 'video/quicktime',
+    '.movie': 'video/x-sgi-movie',
+    '.mpe': 'video/mpeg',
+    '.mpeg': 'video/mpeg',
+    '.mpg': 'video/mpeg',
+    '.ms': 'application/x-troff-ms',
+    '.nc': 'application/x-netcdf',
+    '.o': 'application/octet-stream',
+    '.obj': 'application/octet-stream',
+    '.oda': 'application/oda',
+    '.pbm': 'image/x-portable-bitmap',
+    '.pdf': 'application/pdf',
+    '.pgm': 'image/x-portable-graymap',
+    '.pnm': 'image/x-portable-anymap',
+    '.png': 'image/png',
+    '.ppm': 'image/x-portable-pixmap',
+    '.py': 'text/x-python',
+    '.pyc': 'application/x-python-code',
+    '.ps': 'application/postscript',
+    '.qt': 'video/quicktime',
+    '.ras': 'image/x-cmu-raster',
+    '.rgb': 'image/x-rgb',
+    '.roff': 'application/x-troff',
+    '.rtf': 'application/rtf',
+    '.rtx': 'text/richtext',
+    '.sgm': 'text/x-sgml',
+    '.sgml': 'text/x-sgml',
+    '.sh': 'application/x-sh',
+    '.shar': 'application/x-shar',
+    '.snd': 'audio/basic',
+    '.so': 'application/octet-stream',
+    '.src': 'application/x-wais-source',
+    '.sv4cpio': 'application/x-sv4cpio',
+    '.sv4crc': 'application/x-sv4crc',
+    '.t': 'application/x-troff',
+    '.tar': 'application/x-tar',
+    '.tcl': 'application/x-tcl',
+    '.tex': 'application/x-tex',
+    '.texi': 'application/x-texinfo',
+    '.texinfo': 'application/x-texinfo',
+    '.tif': 'image/tiff',
+    '.tiff': 'image/tiff',
+    '.tr': 'application/x-troff',
+    '.tsv': 'text/tab-separated-values',
+    '.txt': 'text/plain',
+    '.ustar': 'application/x-ustar',
+    '.wav': 'audio/x-wav',
+    '.xbm': 'image/x-xbitmap',
+    '.xpm': 'image/x-xpixmap',
+    '.xwd': 'image/x-xwindowdump',
+    '.zip': 'application/zip',
+    }