Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 22dcc31

Browse files
committed
Added outerjoin, lefjoin and rightjoin support to rec_join
svn path=/trunk/matplotlib/; revision=5007
1 parent eac15d2 commit 22dcc31

2 files changed

Lines changed: 69 additions & 12 deletions

File tree

examples/rec_join_demo.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import numpy as np
2+
import matplotlib.mlab as mlab
3+
4+
5+
r = mlab.csv2rec('data/aapl.csv')
6+
r.sort()
7+
r1 = r[-10:]
8+
9+
# Create a new array
10+
r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float),
11+
('marker', np.float)])
12+
r2 = r2.view(np.recarray)
13+
r2.date = r.date[-17:-5]
14+
r2.high = r.high[-17:-5]
15+
r2.marker = np.arange(12)
16+
17+
print "r1:"
18+
print mlab.rec2txt(r1)
19+
print "r2:"
20+
print mlab.rec2txt(r2)
21+
22+
defaults = {'marker':-1, 'close':np.NaN, 'low':-4444.}
23+
24+
for s in ('inner', 'outer', 'leftouter'):
25+
rec = mlab.rec_join(['date', 'high'], r1, r2,
26+
jointype=s, defaults=defaults)
27+
print "\n%sjoin :\n%s" % (s, mlab.rec2txt(rec))

lib/matplotlib/mlab.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,12 +2044,19 @@ def rec_summarize(r, summaryfuncs):
20442044

20452045
return npy.rec.fromarrays(arrays, names=names)
20462046

2047-
def rec_join(key, r1, r2):
2047+
2048+
def rec_join(key, r1, r2, jointype='inner', defaults=None):
20482049
"""
20492050
join record arrays r1 and r2 on key; key is a tuple of field
20502051
names. if r1 and r2 have equal values on all the keys in the key
20512052
tuple, then their fields will be merged into a new record array
20522053
containing the intersection of the fields of r1 and r2
2054+
2055+
The jointype keyword can be 'inner', 'outer', 'leftouter'.
2056+
To do a rightouter join just reverse r1 and r2.
2057+
2058+
The defaults keyword is a dictionary filled with
2059+
{column_name:default_value} pairs.
20532060
"""
20542061

20552062
for name in key:
@@ -2067,16 +2074,21 @@ def makekey(row):
20672074
r1keys = set(r1d.keys())
20682075
r2keys = set(r2d.keys())
20692076

2070-
keys = r1keys & r2keys
2071-
2072-
r1ind = npy.array([r1d[k] for k in keys])
2073-
r2ind = npy.array([r2d[k] for k in keys])
2077+
common_keys = r1keys & r2keys
20742078

2075-
# Make sure that the output rows have the same relative order as r1
2076-
sortind = r1ind.argsort()
2079+
r1ind = npy.array([r1d[k] for k in common_keys])
2080+
r2ind = npy.array([r2d[k] for k in common_keys])
20772081

2078-
r1 = r1[r1ind[sortind]]
2079-
r2 = r2[r2ind[sortind]]
2082+
common_len = len(common_keys)
2083+
left_len = right_len = 0
2084+
if jointype == "outer" or jointype == "leftouter":
2085+
left_keys = r1keys.difference(r2keys)
2086+
left_ind = npy.array([r1d[k] for k in left_keys])
2087+
left_len = len(left_ind)
2088+
if jointype == "outer":
2089+
right_keys = r2keys.difference(r1keys)
2090+
right_ind = npy.array([r2d[k] for k in right_keys])
2091+
right_len = len(right_ind)
20802092

20812093
r2 = rec_drop_fields(r2, r1.dtype.names)
20822094

@@ -2103,12 +2115,30 @@ def key_desc(name):
21032115
[desc for desc in r2.dtype.descr if desc[0] not in key ] )
21042116

21052117

2106-
newrec = npy.empty(len(r1), dtype=newdtype)
2118+
newrec = npy.empty(common_len + left_len + right_len, dtype=newdtype)
2119+
2120+
if jointype != 'inner' and defaults is not None: # fill in the defaults enmasse
2121+
newrec_fields = newrec.dtype.fields.keys()
2122+
for k, v in defaults.items():
2123+
if k in newrec_fields:
2124+
newrec[k] = v
2125+
21072126
for field in r1.dtype.names:
2108-
newrec[field] = r1[field]
2127+
newrec[field][:common_len] = r1[field][r1ind]
2128+
if jointype == "outer" or jointype == "leftouter":
2129+
newrec[field][common_len:(common_len+left_len)] = r1[field][left_ind]
21092130

21102131
for field in r2.dtype.names:
2111-
newrec[field] = r2[field]
2132+
newrec[field][:common_len] = r2[field][r2ind]
2133+
if jointype == "outer":
2134+
newrec[field][-right_len:] = r2[field][right_ind[right_ind.argsort()]]
2135+
2136+
# sort newrec using the same order as r1
2137+
sort_indices = r1ind.copy()
2138+
if jointype == "outer" or jointype == "leftouter":
2139+
sort_indices = npy.append(sort_indices, left_ind)
2140+
newrec[:(common_len+left_len)] = newrec[sort_indices.argsort()]
2141+
21122142

21132143
return newrec.view(npy.recarray)
21142144

0 commit comments

Comments
 (0)