|
45 | 45 | this efficiently by caching the direct FFTs. |
46 | 46 |
|
47 | 47 | = record array helper functions = |
48 | | - * rec2txt : pretty print a record array |
49 | 48 | * rec2txt : pretty print a record array |
50 | 49 | * rec2csv : store record array in CSV file |
51 | 50 | * csv2rec : import record array from CSV file with type inspection |
@@ -2114,139 +2113,6 @@ def key_desc(name): |
2114 | 2113 | return newrec.view(npy.recarray) |
2115 | 2114 |
|
2116 | 2115 |
|
2117 | | -def rec_groupby(r, groupby, stats): |
2118 | | - """ |
2119 | | - r is a numpy record array |
2120 | | -
|
2121 | | - groupby is a sequence of record array attribute names that |
2122 | | - together form the grouping key. eg ('date', 'productcode') |
2123 | | -
|
2124 | | - stats is a sequence of (attr, func, outname) which will call x = |
2125 | | - func(attr) and assign x to the record array output with attribute |
2126 | | - outname. |
2127 | | - Eg, stats = ( ('sales', len, 'numsales'), ('sales', npy.mean, 'avgsale') ) |
2128 | | -
|
2129 | | - return record array has dtype names for each attribute name in in |
2130 | | - the the 'groupby' argument, with the associated group values, and |
2131 | | - for each outname name in the stats argument, with the associated |
2132 | | - stat summary output |
2133 | | - """ |
2134 | | - # build a dictionary from groupby keys-> list of indices into r with |
2135 | | - # those keys |
2136 | | - rowd = dict() |
2137 | | - for i, row in enumerate(r): |
2138 | | - key = tuple([row[attr] for attr in groupby]) |
2139 | | - rowd.setdefault(key, []).append(i) |
2140 | | - |
2141 | | - # sort the output by groupby keys |
2142 | | - keys = rowd.keys() |
2143 | | - keys.sort() |
2144 | | - |
2145 | | - rows = [] |
2146 | | - for key in keys: |
2147 | | - row = list(key) |
2148 | | - # get the indices for this groupby key |
2149 | | - ind = rowd[key] |
2150 | | - thisr = r[ind] |
2151 | | - # call each stat function for this groupby slice |
2152 | | - row.extend([func(thisr[attr]) for attr, func, outname in stats]) |
2153 | | - rows.append(row) |
2154 | | - |
2155 | | - # build the output record array with groupby and outname attributes |
2156 | | - attrs, funcs, outnames = zip(*stats) |
2157 | | - names = list(groupby) |
2158 | | - names.extend(outnames) |
2159 | | - return npy.rec.fromrecords(rows, names=names) |
2160 | | - |
2161 | | - |
2162 | | - |
2163 | | -def rec_summarize(r, summaryfuncs): |
2164 | | - """ |
2165 | | - r is a numpy record array |
2166 | | -
|
2167 | | - summaryfuncs is a list of (attr, func, outname) which will |
2168 | | - apply codefunc to the the array r[attr] and assign the output |
2169 | | - to a new attribute name outname. The returned record array is |
2170 | | - identical to r, with extra arrays for each element in summaryfuncs |
2171 | | - """ |
2172 | | - |
2173 | | - names = list(r.dtype.names) |
2174 | | - arrays = [r[name] for name in names] |
2175 | | - |
2176 | | - for attr, func, outname in summaryfuncs: |
2177 | | - names.append(outname) |
2178 | | - arrays.append(npy.asarray(func(r[attr]))) |
2179 | | - |
2180 | | - return npy.rec.fromarrays(arrays, names=names) |
2181 | | - |
2182 | | -def rec_join(key, r1, r2): |
2183 | | - """ |
2184 | | - join record arrays r1 and r2 on key; key is a tuple of field |
2185 | | - names. if r1 and r2 have equal values on all the keys in the key |
2186 | | - tuple, then their fields will be merged into a new record array |
2187 | | - containing the intersection of the fields of r1 and r2 |
2188 | | - """ |
2189 | | - |
2190 | | - for name in key: |
2191 | | - if name not in r1.dtype.names: |
2192 | | - raise ValueError('r1 does not have key field %s'%name) |
2193 | | - if name not in r2.dtype.names: |
2194 | | - raise ValueError('r2 does not have key field %s'%name) |
2195 | | - |
2196 | | - def makekey(row): |
2197 | | - return tuple([row[name] for name in key]) |
2198 | | - |
2199 | | - r1d = dict([(makekey(row),i) for i,row in enumerate(r1)]) |
2200 | | - r2d = dict([(makekey(row),i) for i,row in enumerate(r2)]) |
2201 | | - |
2202 | | - r1keys = set(r1d.keys()) |
2203 | | - r2keys = set(r2d.keys()) |
2204 | | - |
2205 | | - keys = r1keys & r2keys |
2206 | | - |
2207 | | - r1ind = npy.array([r1d[k] for k in keys]) |
2208 | | - r2ind = npy.array([r2d[k] for k in keys]) |
2209 | | - |
2210 | | - # Make sure that the output rows have the same relative order as r1 |
2211 | | - sortind = r1ind.argsort() |
2212 | | - |
2213 | | - r1 = r1[r1ind[sortind]] |
2214 | | - r2 = r2[r2ind[sortind]] |
2215 | | - |
2216 | | - r2 = rec_drop_fields(r2, r1.dtype.names) |
2217 | | - |
2218 | | - |
2219 | | - def key_desc(name): |
2220 | | - 'if name is a string key, use the larger size of r1 or r2 before merging' |
2221 | | - dt1 = r1.dtype[name] |
2222 | | - if dt1.type != npy.string_: |
2223 | | - return (name, dt1.descr[0][1]) |
2224 | | - |
2225 | | - dt2 = r1.dtype[name] |
2226 | | - assert dt2==dt1 |
2227 | | - if dt1.num>dt2.num: |
2228 | | - return (name, dt1.descr[0][1]) |
2229 | | - else: |
2230 | | - return (name, dt2.descr[0][1]) |
2231 | | - |
2232 | | - |
2233 | | - |
2234 | | - keydesc = [key_desc(name) for name in key] |
2235 | | - |
2236 | | - newdtype = npy.dtype(keydesc + |
2237 | | - [desc for desc in r1.dtype.descr if desc[0] not in key ] + |
2238 | | - [desc for desc in r2.dtype.descr if desc[0] not in key ] ) |
2239 | | - |
2240 | | - |
2241 | | - newrec = npy.empty(len(r1), dtype=newdtype) |
2242 | | - for field in r1.dtype.names: |
2243 | | - newrec[field] = r1[field] |
2244 | | - |
2245 | | - for field in r2.dtype.names: |
2246 | | - newrec[field] = r2[field] |
2247 | | - |
2248 | | - return newrec.view(npy.recarray) |
2249 | | - |
2250 | 2116 | def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', |
2251 | 2117 | converterd=None, names=None, missing=None): |
2252 | 2118 | """ |
@@ -2633,6 +2499,7 @@ def format(item, just_pad_prec_spacer): |
2633 | 2499 | return text |
2634 | 2500 |
|
2635 | 2501 |
|
| 2502 | + |
2636 | 2503 | def rec2csv(r, fname, delimiter=',', formatd=None): |
2637 | 2504 | """ |
2638 | 2505 | Save the data from numpy record array r into a comma/space/tab |
|
0 commit comments