@@ -1923,287 +1923,6 @@ def extract(r):
1923
1923
return np .rec .fromrecords (results , names = names )
1924
1924
1925
1925
1926
- @cbook .deprecated ("2.2" )
1927
- def csv2rec (fname , comments = '#' , skiprows = 0 , checkrows = 0 , delimiter = ',' ,
1928
- converterd = None , names = None , missing = '' , missingd = None ,
1929
- use_mrecords = False , dayfirst = False , yearfirst = False ):
1930
- """
1931
- Load data from comma/space/tab delimited file in *fname* into a
1932
- numpy record array and return the record array.
1933
-
1934
- If *names* is *None*, a header row is required to automatically
1935
- assign the recarray names. The headers will be lower cased,
1936
- spaces will be converted to underscores, and illegal attribute
1937
- name characters removed. If *names* is not *None*, it is a
1938
- sequence of names to use for the column names. In this case, it
1939
- is assumed there is no header row.
1940
-
1941
-
1942
- - *fname*: can be a filename or a file handle. Support for gzipped
1943
- files is automatic, if the filename ends in '.gz'
1944
-
1945
- - *comments*: the character used to indicate the start of a comment
1946
- in the file, or *None* to switch off the removal of comments
1947
-
1948
- - *skiprows*: is the number of rows from the top to skip
1949
-
1950
- - *checkrows*: is the number of rows to check to validate the column
1951
- data type. When set to zero all rows are validated.
1952
-
1953
- - *converterd*: if not *None*, is a dictionary mapping column number or
1954
- munged column name to a converter function.
1955
-
1956
- - *names*: if not None, is a list of header names. In this case, no
1957
- header will be read from the file
1958
-
1959
- - *missingd* is a dictionary mapping munged column names to field values
1960
- which signify that the field does not contain actual data and should
1961
- be masked, e.g., '0000-00-00' or 'unused'
1962
-
1963
- - *missing*: a string whose value signals a missing field regardless of
1964
- the column it appears in
1965
-
1966
- - *use_mrecords*: if True, return an mrecords.fromrecords record array if
1967
- any of the data are missing
1968
-
1969
- - *dayfirst*: default is False so that MM-DD-YY has precedence over
1970
- DD-MM-YY. See
1971
- http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1972
- for further information.
1973
-
1974
- - *yearfirst*: default is False so that MM-DD-YY has precedence over
1975
- YY-MM-DD. See
1976
- http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1977
- for further information.
1978
-
1979
- If no rows are found, *None* is returned
1980
- """
1981
-
1982
- if converterd is None :
1983
- converterd = dict ()
1984
-
1985
- if missingd is None :
1986
- missingd = {}
1987
-
1988
- import dateutil .parser
1989
- import datetime
1990
-
1991
- fh = cbook .to_filehandle (fname )
1992
-
1993
- delimiter = str (delimiter )
1994
-
1995
- class FH :
1996
- """
1997
- For space-delimited files, we want different behavior than
1998
- comma or tab. Generally, we want multiple spaces to be
1999
- treated as a single separator, whereas with comma and tab we
2000
- want multiple commas to return multiple (empty) fields. The
2001
- join/strip trick below effects this.
2002
- """
2003
- def __init__ (self , fh ):
2004
- self .fh = fh
2005
-
2006
- def close (self ):
2007
- self .fh .close ()
2008
-
2009
- def seek (self , arg ):
2010
- self .fh .seek (arg )
2011
-
2012
- def fix (self , s ):
2013
- return ' ' .join (s .split ())
2014
-
2015
- def __next__ (self ):
2016
- return self .fix (next (self .fh ))
2017
-
2018
- def __iter__ (self ):
2019
- for line in self .fh :
2020
- yield self .fix (line )
2021
-
2022
- if delimiter == ' ' :
2023
- fh = FH (fh )
2024
-
2025
- reader = csv .reader (fh , delimiter = delimiter )
2026
-
2027
- def process_skiprows (reader ):
2028
- if skiprows :
2029
- for i , row in enumerate (reader ):
2030
- if i >= (skiprows - 1 ):
2031
- break
2032
-
2033
- return fh , reader
2034
-
2035
- process_skiprows (reader )
2036
-
2037
- def ismissing (name , val ):
2038
- "Should the value val in column name be masked?"
2039
- return val == missing or val == missingd .get (name ) or val == ''
2040
-
2041
- def with_default_value (func , default ):
2042
- def newfunc (name , val ):
2043
- if ismissing (name , val ):
2044
- return default
2045
- else :
2046
- return func (val )
2047
- return newfunc
2048
-
2049
- def mybool (x ):
2050
- if x == 'True' :
2051
- return True
2052
- elif x == 'False' :
2053
- return False
2054
- else :
2055
- raise ValueError ('invalid bool' )
2056
-
2057
- dateparser = dateutil .parser .parse
2058
-
2059
- def mydateparser (x ):
2060
- # try and return a datetime object
2061
- d = dateparser (x , dayfirst = dayfirst , yearfirst = yearfirst )
2062
- return d
2063
-
2064
- mydateparser = with_default_value (mydateparser , datetime .datetime (1 , 1 , 1 ))
2065
-
2066
- myfloat = with_default_value (float , np .nan )
2067
- myint = with_default_value (int , - 1 )
2068
- mystr = with_default_value (str , '' )
2069
- mybool = with_default_value (mybool , None )
2070
-
2071
- def mydate (x ):
2072
- # try and return a date object
2073
- d = dateparser (x , dayfirst = dayfirst , yearfirst = yearfirst )
2074
-
2075
- if d .hour > 0 or d .minute > 0 or d .second > 0 :
2076
- raise ValueError ('not a date' )
2077
- return d .date ()
2078
- mydate = with_default_value (mydate , datetime .date (1 , 1 , 1 ))
2079
-
2080
- def get_func (name , item , func ):
2081
- # promote functions in this order
2082
- funcs = [mybool , myint , myfloat , mydate , mydateparser , mystr ]
2083
- for func in funcs [funcs .index (func ):]:
2084
- try :
2085
- func (name , item )
2086
- except Exception :
2087
- continue
2088
- return func
2089
- raise ValueError ('Could not find a working conversion function' )
2090
-
2091
- # map column names that clash with builtins -- TODO - extend this list
2092
- itemd = {
2093
- 'return' : 'return_' ,
2094
- 'file' : 'file_' ,
2095
- 'print' : 'print_' ,
2096
- }
2097
-
2098
- def get_converters (reader , comments ):
2099
-
2100
- converters = None
2101
- i = 0
2102
- for row in reader :
2103
- if (len (row ) and comments is not None and
2104
- row [0 ].startswith (comments )):
2105
- continue
2106
- if i == 0 :
2107
- converters = [mybool ]* len (row )
2108
- if checkrows and i > checkrows :
2109
- break
2110
- i += 1
2111
-
2112
- for j , (name , item ) in enumerate (zip (names , row )):
2113
- func = converterd .get (j )
2114
- if func is None :
2115
- func = converterd .get (name )
2116
- if func is None :
2117
- func = converters [j ]
2118
- if len (item .strip ()):
2119
- func = get_func (name , item , func )
2120
- else :
2121
- # how should we handle custom converters and defaults?
2122
- func = with_default_value (func , None )
2123
- converters [j ] = func
2124
- return converters
2125
-
2126
- # Get header and remove invalid characters
2127
- needheader = names is None
2128
-
2129
- if needheader :
2130
- for row in reader :
2131
- if (len (row ) and comments is not None and
2132
- row [0 ].startswith (comments )):
2133
- continue
2134
- headers = row
2135
- break
2136
-
2137
- # remove these chars
2138
- delete = set (r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""" )
2139
- delete .add ('"' )
2140
-
2141
- names = []
2142
- seen = dict ()
2143
- for i , item in enumerate (headers ):
2144
- item = item .strip ().lower ().replace (' ' , '_' )
2145
- item = '' .join ([c for c in item if c not in delete ])
2146
- if not len (item ):
2147
- item = 'column%d' % i
2148
-
2149
- item = itemd .get (item , item )
2150
- cnt = seen .get (item , 0 )
2151
- if cnt > 0 :
2152
- names .append (item + '_%d' % cnt )
2153
- else :
2154
- names .append (item )
2155
- seen [item ] = cnt + 1
2156
-
2157
- else :
2158
- if isinstance (names , str ):
2159
- names = [n .strip () for n in names .split (',' )]
2160
-
2161
- # get the converter functions by inspecting checkrows
2162
- converters = get_converters (reader , comments )
2163
- if converters is None :
2164
- raise ValueError ('Could not find any valid data in CSV file' )
2165
-
2166
- # reset the reader and start over
2167
- fh .seek (0 )
2168
- reader = csv .reader (fh , delimiter = delimiter )
2169
- process_skiprows (reader )
2170
-
2171
- if needheader :
2172
- while True :
2173
- # skip past any comments and consume one line of column header
2174
- row = next (reader )
2175
- if (len (row ) and comments is not None and
2176
- row [0 ].startswith (comments )):
2177
- continue
2178
- break
2179
-
2180
- # iterate over the remaining rows and convert the data to date
2181
- # objects, ints, or floats as appropriate
2182
- rows = []
2183
- rowmasks = []
2184
- for i , row in enumerate (reader ):
2185
- if not len (row ):
2186
- continue
2187
- if comments is not None and row [0 ].startswith (comments ):
2188
- continue
2189
- # Ensure that the row returned always has the same nr of elements
2190
- row .extend (['' ] * (len (converters ) - len (row )))
2191
- rows .append ([func (name , val )
2192
- for func , name , val in zip (converters , names , row )])
2193
- rowmasks .append ([ismissing (name , val )
2194
- for name , val in zip (names , row )])
2195
- fh .close ()
2196
-
2197
- if not len (rows ):
2198
- return None
2199
-
2200
- if use_mrecords and np .any (rowmasks ):
2201
- r = np .ma .mrecords .fromrecords (rows , names = names , mask = rowmasks )
2202
- else :
2203
- r = np .rec .fromrecords (rows , names = names )
2204
- return r
2205
-
2206
-
2207
1926
# a series of classes for describing the format intentions of various rec views
2208
1927
@cbook .deprecated ("2.2" )
2209
1928
class FormatObj (object ):
@@ -2484,78 +2203,6 @@ def format(item, just_pad_prec_spacer):
2484
2203
return text
2485
2204
2486
2205
2487
- @cbook .deprecated ("2.2" , alternative = 'numpy.recarray.tofile' )
2488
- def rec2csv (r , fname , delimiter = ',' , formatd = None , missing = '' ,
2489
- missingd = None , withheader = True ):
2490
- """
2491
- Save the data from numpy recarray *r* into a
2492
- comma-/space-/tab-delimited file. The record array dtype names
2493
- will be used for column headers.
2494
-
2495
- *fname*: can be a filename or a file handle. Support for gzipped
2496
- files is automatic, if the filename ends in '.gz'
2497
-
2498
- *withheader*: if withheader is False, do not write the attribute
2499
- names in the first row
2500
-
2501
- for formatd type FormatFloat, we override the precision to store
2502
- full precision floats in the CSV file
2503
-
2504
- See Also
2505
- --------
2506
- :func:`csv2rec`
2507
- For information about *missing* and *missingd*, which can be used to
2508
- fill in masked values into your CSV file.
2509
- """
2510
-
2511
- delimiter = str (delimiter )
2512
-
2513
- if missingd is None :
2514
- missingd = dict ()
2515
-
2516
- def with_mask (func ):
2517
- def newfunc (val , mask , mval ):
2518
- if mask :
2519
- return mval
2520
- else :
2521
- return func (val )
2522
- return newfunc
2523
-
2524
- if r .ndim != 1 :
2525
- raise ValueError ('rec2csv only operates on 1 dimensional recarrays' )
2526
-
2527
- formatd = get_formatd (r , formatd )
2528
- funcs = []
2529
- for i , name in enumerate (r .dtype .names ):
2530
- funcs .append (with_mask (csvformat_factory (formatd [name ]).tostr ))
2531
-
2532
- fh , opened = cbook .to_filehandle (fname , 'wb' , return_opened = True )
2533
- writer = csv .writer (fh , delimiter = delimiter )
2534
- header = r .dtype .names
2535
- if withheader :
2536
- writer .writerow (header )
2537
-
2538
- # Our list of specials for missing values
2539
- mvals = []
2540
- for name in header :
2541
- mvals .append (missingd .get (name , missing ))
2542
-
2543
- ismasked = False
2544
- if len (r ):
2545
- row = r [0 ]
2546
- ismasked = hasattr (row , '_fieldmask' )
2547
-
2548
- for row in r :
2549
- if ismasked :
2550
- row , rowmask = row .item (), row ._fieldmask .item ()
2551
- else :
2552
- rowmask = [False ] * len (row )
2553
- writer .writerow ([func (val , mask , mval ) for func , val , mask , mval
2554
- in zip (funcs , row , rowmask , mvals )])
2555
- if opened :
2556
- fh .close ()
2557
-
2558
-
2559
2206
@cbook .deprecated ('2.2' , alternative = 'scipy.interpolate.griddata' )
2560
2207
def griddata (x , y , z , xi , yi , interp = 'nn' ):
2561
2208
"""
0 commit comments