99 QUOTE_MINIMAL , QUOTE_ALL , QUOTE_NONNUMERIC , QUOTE_NONE , \
1010 __doc__
1111
12+ try :
13+ from cStringIO import StringIO
14+ except ImportError :
15+ from StringIO import StringIO
16+
1217__all__ = [ "QUOTE_MINIMAL" , "QUOTE_ALL" , "QUOTE_NONNUMERIC" , "QUOTE_NONE" ,
1318 "Error" , "Dialect" , "excel" , "excel_tab" , "reader" , "writer" ,
1419 "register_dialect" , "get_dialect" , "list_dialects" , "Sniffer" ,
@@ -147,52 +152,39 @@ def writerows(self, rowdicts):
147152class Sniffer :
148153 '''
149154 "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
150- Returns a csv. Dialect object.
155+ Returns a Dialect object.
151156 '''
152- def __init__ (self , sample = 16 * 1024 ):
157+ def __init__ (self ):
153158 # in case there is more than one possible delimiter
154159 self .preferred = [',' , '\t ' , ';' , ' ' , ':' ]
155160
156- # amount of data (in bytes) to sample
157- self .sample = sample
158161
159-
160- def sniff (self , fileobj ):
162+ def sniff (self , sample ):
161163 """
162- Takes a file-like object and returns a dialect (or None)
164+ Returns a dialect (or None) corresponding to the sample
163165 """
164- self .fileobj = fileobj
165-
166- data = fileobj .read (self .sample )
167166
168167 quotechar , delimiter , skipinitialspace = \
169- self ._guessQuoteAndDelimiter ( data )
168+ self ._guess_quote_and_delimiter ( sample )
170169 if delimiter is None :
171- delimiter , skipinitialspace = self ._guessDelimiter ( data )
170+ delimiter , skipinitialspace = self ._guess_delimiter ( sample )
172171
173- class SniffedDialect (Dialect ):
172+ class dialect (Dialect ):
174173 _name = "sniffed"
175174 lineterminator = '\r \n '
176175 quoting = QUOTE_MINIMAL
177176 # escapechar = ''
178177 doublequote = False
179- SniffedDialect .delimiter = delimiter
180- SniffedDialect .quotechar = quotechar
181- SniffedDialect .skipinitialspace = skipinitialspace
182178
183- self .dialect = SniffedDialect
184- return self .dialect
179+ dialect .delimiter = delimiter
180+ # _csv.reader won't accept a quotechar of ''
181+ dialect .quotechar = quotechar or '"'
182+ dialect .skipinitialspace = skipinitialspace
185183
184+ return dialect
186185
187- def hasHeaders (self ):
188- return self ._hasHeaders (self .fileobj , self .dialect )
189186
190-
191- def register_dialect (self , name = 'sniffed' ):
192- register_dialect (name , self .dialect )
193-
194-
195- def _guessQuoteAndDelimiter (self , data ):
187+ def _guess_quote_and_delimiter (self , data ):
196188 """
197189 Looks for text enclosed between two identical quotes
198190 (the probable quotechar) which are preceded and followed
@@ -256,7 +248,7 @@ def _guessQuoteAndDelimiter(self, data):
256248 return (quotechar , delim , skipinitialspace )
257249
258250
259- def _guessDelimiter (self , data ):
251+ def _guess_delimiter (self , data ):
260252 """
261253 The delimiter /should/ occur the same number of times on
262254 each row. However, due to malformed data, it may not. We don't want
@@ -290,12 +282,12 @@ def _guessDelimiter(self, data):
290282 iteration += 1
291283 for line in data [start :end ]:
292284 for char in ascii :
293- metafrequency = charFrequency .get (char , {})
285+ metaFrequency = charFrequency .get (char , {})
294286 # must count even if frequency is 0
295287 freq = line .strip ().count (char )
296288 # value is the mode
297- metafrequency [freq ] = metafrequency .get (freq , 0 ) + 1
298- charFrequency [char ] = metafrequency
289+ metaFrequency [freq ] = metaFrequency .get (freq , 0 ) + 1
290+ charFrequency [char ] = metaFrequency
299291
300292 for char in charFrequency .keys ():
301293 items = charFrequency [char ].items ()
@@ -356,7 +348,7 @@ def _guessDelimiter(self, data):
356348 return (delim , skipinitialspace )
357349
358350
359- def _hasHeaders (self , fileobj , dialect ):
351+ def has_header (self , sample ):
360352 # Creates a dictionary of types of data in each column. If any
361353 # column is of a single type (say, integers), *except* for the first
362354 # row, then the first row is presumed to be labels. If the type
@@ -373,23 +365,16 @@ def seval(item):
373365 """
374366 return eval (item .replace ('(' , '' ).replace (')' , '' ))
375367
376- # rewind the fileobj - this might not work for some file-like
377- # objects...
378- fileobj .seek (0 )
379-
380- r = csv .reader (fileobj ,
381- delimiter = dialect .delimiter ,
382- quotechar = dialect .quotechar ,
383- skipinitialspace = dialect .skipinitialspace )
368+ rdr = reader (StringIO (sample ), self .sniff (sample ))
384369
385- header = r .next () # assume first row is header
370+ header = rdr .next () # assume first row is header
386371
387372 columns = len (header )
388373 columnTypes = {}
389374 for i in range (columns ): columnTypes [i ] = None
390375
391376 checked = 0
392- for row in r :
377+ for row in rdr :
393378 # arbitrary number of rows to check, to keep it sane
394379 if checked > 20 :
395380 break
0 commit comments