@@ -2044,12 +2044,19 @@ def rec_summarize(r, summaryfuncs):
20442044
20452045 return npy .rec .fromarrays (arrays , names = names )
20462046
2047- def rec_join (key , r1 , r2 ):
2047+
2048+ def rec_join (key , r1 , r2 , jointype = 'inner' , defaults = None ):
20482049 """
20492050 join record arrays r1 and r2 on key; key is a tuple of field
20502051 names. if r1 and r2 have equal values on all the keys in the key
20512052 tuple, then their fields will be merged into a new record array
20522053 containing the intersection of the fields of r1 and r2
2054+
2055+ The jointype keyword can be 'inner', 'outer', 'leftouter'.
2056+ To do a rightouter join just reverse r1 and r2.
2057+
2058+ The defaults keyword is a dictionary filled with
2059+ {column_name:default_value} pairs.
20532060 """
20542061
20552062 for name in key :
@@ -2067,16 +2074,21 @@ def makekey(row):
20672074 r1keys = set (r1d .keys ())
20682075 r2keys = set (r2d .keys ())
20692076
2070- keys = r1keys & r2keys
2071-
2072- r1ind = npy .array ([r1d [k ] for k in keys ])
2073- r2ind = npy .array ([r2d [k ] for k in keys ])
2077+ common_keys = r1keys & r2keys
20742078
2075- # Make sure that the output rows have the same relative order as r1
2076- sortind = r1ind . argsort ( )
2079+ r1ind = npy . array ([ r1d [ k ] for k in common_keys ])
2080+ r2ind = npy . array ([ r2d [ k ] for k in common_keys ] )
20772081
2078- r1 = r1 [r1ind [sortind ]]
2079- r2 = r2 [r2ind [sortind ]]
2082+ common_len = len (common_keys )
2083+ left_len = right_len = 0
2084+ if jointype == "outer" or jointype == "leftouter" :
2085+ left_keys = r1keys .difference (r2keys )
2086+ left_ind = npy .array ([r1d [k ] for k in left_keys ])
2087+ left_len = len (left_ind )
2088+ if jointype == "outer" :
2089+ right_keys = r2keys .difference (r1keys )
2090+ right_ind = npy .array ([r2d [k ] for k in right_keys ])
2091+ right_len = len (right_ind )
20802092
20812093 r2 = rec_drop_fields (r2 , r1 .dtype .names )
20822094
@@ -2103,12 +2115,30 @@ def key_desc(name):
21032115 [desc for desc in r2 .dtype .descr if desc [0 ] not in key ] )
21042116
21052117
2106- newrec = npy .empty (len (r1 ), dtype = newdtype )
2118+ newrec = npy .empty (common_len + left_len + right_len , dtype = newdtype )
2119+
2120+ if jointype != 'inner' and defaults is not None : # fill in the defaults enmasse
2121+ newrec_fields = newrec .dtype .fields .keys ()
2122+ for k , v in defaults .items ():
2123+ if k in newrec_fields :
2124+ newrec [k ] = v
2125+
21072126 for field in r1 .dtype .names :
2108- newrec [field ] = r1 [field ]
2127+ newrec [field ][:common_len ] = r1 [field ][r1ind ]
2128+ if jointype == "outer" or jointype == "leftouter" :
2129+ newrec [field ][common_len :(common_len + left_len )] = r1 [field ][left_ind ]
21092130
21102131 for field in r2 .dtype .names :
2111- newrec [field ] = r2 [field ]
2132+ newrec [field ][:common_len ] = r2 [field ][r2ind ]
2133+ if jointype == "outer" :
2134+ newrec [field ][- right_len :] = r2 [field ][right_ind [right_ind .argsort ()]]
2135+
2136+ # sort newrec using the same order as r1
2137+ sort_indices = r1ind .copy ()
2138+ if jointype == "outer" or jointype == "leftouter" :
2139+ sort_indices = npy .append (sort_indices , left_ind )
2140+ newrec [:(common_len + left_len )] = newrec [sort_indices .argsort ()]
2141+
21122142
21132143 return newrec .view (npy .recarray )
21142144
0 commit comments