|
53 | 53 | Apply a window along a given axis |
54 | 54 | """ |
55 | 55 |
|
56 | | -import csv |
57 | 56 | import functools |
58 | 57 | from numbers import Number |
59 | 58 |
|
@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning, |
985 | 984 | return Cxy, f |
986 | 985 |
|
987 | 986 |
|
988 | | -def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', |
989 | | - converterd=None, names=None, missing='', missingd=None, |
990 | | - use_mrecords=False, dayfirst=False, yearfirst=False): |
991 | | - """ |
992 | | - Load data from comma/space/tab delimited file in *fname* into a |
993 | | - numpy record array and return the record array. |
994 | | -
|
995 | | - If *names* is *None*, a header row is required to automatically |
996 | | - assign the recarray names. The headers will be lower cased, |
997 | | - spaces will be converted to underscores, and illegal attribute |
998 | | - name characters removed. If *names* is not *None*, it is a |
999 | | - sequence of names to use for the column names. In this case, it |
1000 | | - is assumed there is no header row. |
1001 | | -
|
1002 | | -
|
1003 | | - - *fname*: can be a filename or a file handle. Support for gzipped |
1004 | | - files is automatic, if the filename ends in '.gz' |
1005 | | -
|
1006 | | - - *comments*: the character used to indicate the start of a comment |
1007 | | - in the file, or *None* to switch off the removal of comments |
1008 | | -
|
1009 | | - - *skiprows*: is the number of rows from the top to skip |
1010 | | -
|
1011 | | - - *checkrows*: is the number of rows to check to validate the column |
1012 | | - data type. When set to zero all rows are validated. |
1013 | | -
|
1014 | | - - *converterd*: if not *None*, is a dictionary mapping column number or |
1015 | | - munged column name to a converter function. |
1016 | | -
|
1017 | | - - *names*: if not None, is a list of header names. In this case, no |
1018 | | - header will be read from the file |
1019 | | -
|
1020 | | - - *missingd* is a dictionary mapping munged column names to field values |
1021 | | - which signify that the field does not contain actual data and should |
1022 | | - be masked, e.g., '0000-00-00' or 'unused' |
1023 | | -
|
1024 | | - - *missing*: a string whose value signals a missing field regardless of |
1025 | | - the column it appears in |
1026 | | -
|
1027 | | - - *use_mrecords*: if True, return an mrecords.fromrecords record array if |
1028 | | - any of the data are missing |
1029 | | -
|
1030 | | - - *dayfirst*: default is False so that MM-DD-YY has precedence over |
1031 | | - DD-MM-YY. See |
1032 | | - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1033 | | - for further information. |
1034 | | -
|
1035 | | - - *yearfirst*: default is False so that MM-DD-YY has precedence over |
1036 | | - YY-MM-DD. See |
1037 | | - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1038 | | - for further information. |
1039 | | -
|
1040 | | - If no rows are found, *None* is returned |
1041 | | - """ |
1042 | | - |
1043 | | - if converterd is None: |
1044 | | - converterd = dict() |
1045 | | - |
1046 | | - if missingd is None: |
1047 | | - missingd = {} |
1048 | | - |
1049 | | - import dateutil.parser |
1050 | | - import datetime |
1051 | | - |
1052 | | - fh = cbook.to_filehandle(fname) |
1053 | | - |
1054 | | - delimiter = str(delimiter) |
1055 | | - |
1056 | | - class FH: |
1057 | | - """ |
1058 | | - For space-delimited files, we want different behavior than |
1059 | | - comma or tab. Generally, we want multiple spaces to be |
1060 | | - treated as a single separator, whereas with comma and tab we |
1061 | | - want multiple commas to return multiple (empty) fields. The |
1062 | | - join/strip trick below effects this. |
1063 | | - """ |
1064 | | - def __init__(self, fh): |
1065 | | - self.fh = fh |
1066 | | - |
1067 | | - def close(self): |
1068 | | - self.fh.close() |
1069 | | - |
1070 | | - def seek(self, arg): |
1071 | | - self.fh.seek(arg) |
1072 | | - |
1073 | | - def fix(self, s): |
1074 | | - return ' '.join(s.split()) |
1075 | | - |
1076 | | - def __next__(self): |
1077 | | - return self.fix(next(self.fh)) |
1078 | | - |
1079 | | - def __iter__(self): |
1080 | | - for line in self.fh: |
1081 | | - yield self.fix(line) |
1082 | | - |
1083 | | - if delimiter == ' ': |
1084 | | - fh = FH(fh) |
1085 | | - |
1086 | | - reader = csv.reader(fh, delimiter=delimiter) |
1087 | | - |
1088 | | - def process_skiprows(reader): |
1089 | | - if skiprows: |
1090 | | - for i, row in enumerate(reader): |
1091 | | - if i >= (skiprows-1): |
1092 | | - break |
1093 | | - |
1094 | | - return fh, reader |
1095 | | - |
1096 | | - process_skiprows(reader) |
1097 | | - |
1098 | | - def ismissing(name, val): |
1099 | | - """Return whether the value val in column name should be masked.""" |
1100 | | - return val == missing or val == missingd.get(name) or val == '' |
1101 | | - |
1102 | | - def with_default_value(func, default): |
1103 | | - def newfunc(name, val): |
1104 | | - if ismissing(name, val): |
1105 | | - return default |
1106 | | - else: |
1107 | | - return func(val) |
1108 | | - return newfunc |
1109 | | - |
1110 | | - def mybool(x): |
1111 | | - if x == 'True': |
1112 | | - return True |
1113 | | - elif x == 'False': |
1114 | | - return False |
1115 | | - else: |
1116 | | - raise ValueError('invalid bool') |
1117 | | - |
1118 | | - dateparser = dateutil.parser.parse |
1119 | | - |
1120 | | - def mydateparser(x): |
1121 | | - # try and return a datetime object |
1122 | | - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) |
1123 | | - return d |
1124 | | - |
1125 | | - mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) |
1126 | | - |
1127 | | - myfloat = with_default_value(float, np.nan) |
1128 | | - myint = with_default_value(int, -1) |
1129 | | - mystr = with_default_value(str, '') |
1130 | | - mybool = with_default_value(mybool, None) |
1131 | | - |
1132 | | - def mydate(x): |
1133 | | - # try and return a date object |
1134 | | - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) |
1135 | | - |
1136 | | - if d.hour > 0 or d.minute > 0 or d.second > 0: |
1137 | | - raise ValueError('not a date') |
1138 | | - return d.date() |
1139 | | - mydate = with_default_value(mydate, datetime.date(1, 1, 1)) |
1140 | | - |
1141 | | - def get_func(name, item, func): |
1142 | | - # promote functions in this order |
1143 | | - funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] |
1144 | | - for func in funcs[funcs.index(func):]: |
1145 | | - try: |
1146 | | - func(name, item) |
1147 | | - except Exception: |
1148 | | - continue |
1149 | | - return func |
1150 | | - raise ValueError('Could not find a working conversion function') |
1151 | | - |
1152 | | - # map column names that clash with builtins -- TODO - extend this list |
1153 | | - itemd = { |
1154 | | - 'return': 'return_', |
1155 | | - 'file': 'file_', |
1156 | | - 'print': 'print_', |
1157 | | - } |
1158 | | - |
1159 | | - def get_converters(reader, comments): |
1160 | | - |
1161 | | - converters = None |
1162 | | - i = 0 |
1163 | | - for row in reader: |
1164 | | - if (len(row) and comments is not None and |
1165 | | - row[0].startswith(comments)): |
1166 | | - continue |
1167 | | - if i == 0: |
1168 | | - converters = [mybool]*len(row) |
1169 | | - if checkrows and i > checkrows: |
1170 | | - break |
1171 | | - i += 1 |
1172 | | - |
1173 | | - for j, (name, item) in enumerate(zip(names, row)): |
1174 | | - func = converterd.get(j) |
1175 | | - if func is None: |
1176 | | - func = converterd.get(name) |
1177 | | - if func is None: |
1178 | | - func = converters[j] |
1179 | | - if len(item.strip()): |
1180 | | - func = get_func(name, item, func) |
1181 | | - else: |
1182 | | - # how should we handle custom converters and defaults? |
1183 | | - func = with_default_value(func, None) |
1184 | | - converters[j] = func |
1185 | | - return converters |
1186 | | - |
1187 | | - # Get header and remove invalid characters |
1188 | | - needheader = names is None |
1189 | | - |
1190 | | - if needheader: |
1191 | | - for row in reader: |
1192 | | - if (len(row) and comments is not None and |
1193 | | - row[0].startswith(comments)): |
1194 | | - continue |
1195 | | - headers = row |
1196 | | - break |
1197 | | - |
1198 | | - # remove these chars |
1199 | | - delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") |
1200 | | - delete.add('"') |
1201 | | - |
1202 | | - names = [] |
1203 | | - seen = dict() |
1204 | | - for i, item in enumerate(headers): |
1205 | | - item = item.strip().lower().replace(' ', '_') |
1206 | | - item = ''.join([c for c in item if c not in delete]) |
1207 | | - if not len(item): |
1208 | | - item = 'column%d' % i |
1209 | | - |
1210 | | - item = itemd.get(item, item) |
1211 | | - cnt = seen.get(item, 0) |
1212 | | - if cnt > 0: |
1213 | | - names.append(item + '_%d' % cnt) |
1214 | | - else: |
1215 | | - names.append(item) |
1216 | | - seen[item] = cnt+1 |
1217 | | - |
1218 | | - else: |
1219 | | - if isinstance(names, str): |
1220 | | - names = [n.strip() for n in names.split(',')] |
1221 | | - |
1222 | | - # get the converter functions by inspecting checkrows |
1223 | | - converters = get_converters(reader, comments) |
1224 | | - if converters is None: |
1225 | | - raise ValueError('Could not find any valid data in CSV file') |
1226 | | - |
1227 | | - # reset the reader and start over |
1228 | | - fh.seek(0) |
1229 | | - reader = csv.reader(fh, delimiter=delimiter) |
1230 | | - process_skiprows(reader) |
1231 | | - |
1232 | | - if needheader: |
1233 | | - while True: |
1234 | | - # skip past any comments and consume one line of column header |
1235 | | - row = next(reader) |
1236 | | - if (len(row) and comments is not None and |
1237 | | - row[0].startswith(comments)): |
1238 | | - continue |
1239 | | - break |
1240 | | - |
1241 | | - # iterate over the remaining rows and convert the data to date |
1242 | | - # objects, ints, or floats as appropriate |
1243 | | - rows = [] |
1244 | | - rowmasks = [] |
1245 | | - for i, row in enumerate(reader): |
1246 | | - if not len(row): |
1247 | | - continue |
1248 | | - if comments is not None and row[0].startswith(comments): |
1249 | | - continue |
1250 | | - # Ensure that the row returned always has the same nr of elements |
1251 | | - row.extend([''] * (len(converters) - len(row))) |
1252 | | - rows.append([func(name, val) |
1253 | | - for func, name, val in zip(converters, names, row)]) |
1254 | | - rowmasks.append([ismissing(name, val) |
1255 | | - for name, val in zip(names, row)]) |
1256 | | - fh.close() |
1257 | | - |
1258 | | - if not len(rows): |
1259 | | - return None |
1260 | | - |
1261 | | - if use_mrecords and np.any(rowmasks): |
1262 | | - r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks) |
1263 | | - else: |
1264 | | - r = np.rec.fromrecords(rows, names=names) |
1265 | | - return r |
1266 | | - |
1267 | | - |
1268 | 987 | class GaussianKDE: |
1269 | 988 | """ |
1270 | 989 | Representation of a kernel-density estimate using Gaussian kernels. |
|
0 commit comments