Added a recs_join function to join a single column of multiple record arrays

sameerd · sameerd · commit 2909ee310ca2 · 2009-09-11T20:58:27.000Z
svn path=/trunk/matplotlib/; revision=7746
diff --git a/lib/matplotlib/cbook.py b/lib/matplotlib/cbook.py
@@ -1626,6 +1626,47 @@ def quad2cubic(q0x, q0y, q1x, q1y, q2x, q2y):
     import matplotlib.mlab as mlab
     return mlab.quad2cubic(q0x, q0y, q1x, q1y, q2x, q2y)
 
+def align_iterators(func, *iterables):
+    """ 
+        This generator takes a bunch of iterables that are ordered by func
+        It sends out ordered tuples (func(row), [rows from all iterators matching func(row)])
+        
+        It is used by mlab.recs_join to join record arrays
+    """
+    class myiter:
+        def __init__(self, it):
+            self.it = it
+            self.key = self.value = None
+            self.iternext()
+
+        def iternext(self):
+            try:
+                self.value = self.it.next()
+                self.key = func(self.value)
+            except StopIteration:
+                self.value = self.key = None
+
+        def __call__(self, key):
+            retval = None
+            if key == self.key:
+                retval = self.value
+                self.iternext()
+            elif self.key and key > self.key:
+                raise ValueError, "Iterator has been left behind"
+            return retval
+
+    # This can be made more efficient by not computing the minimum key for each iteration
+    iters = [myiter(it) for it in iterables]
+    minvals = minkey = True
+    while 1:
+        minvals = (filter(None, [it.key for it in iters]))
+        if minvals:
+            minkey = min(minvals)
+            yield (minkey, [it(minkey) for it in iters])
+        else:
+            break
+
+
 
 if __name__=='__main__':
     assert( allequal([1,1,1]) )
diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py
@@ -91,6 +91,9 @@
 :meth:`rec_join`
     join two record arrays on sequence of fields
 
+:meth:`recs_join`
+    a simple join of multiple recarrays using a single column as a key
+
 :meth:`rec_groupby`
     summarize data by groups (similar to SQL GROUP BY)
 
@@ -139,7 +142,7 @@
 """
 
 from __future__ import division
-import csv, warnings, copy, os
+import csv, warnings, copy, os, operator
 
 import numpy as np
 ma = np.ma
@@ -1880,6 +1883,28 @@ def mapped_r2field(name):
 
     return newrec
 
+def recs_join(key, name, recs,missing=0.):
+    """ 
+    *key* is the column name that acts as a key
+    *name* is the name that we want to join
+    *missing" is what the missing fields are replaced by
+    *recarrays* is a list of record arrays to join
+
+    returns a record array with columns [rowkey, name1, name2, ... namen]
+
+    >>> r = recs_join("date", "close", recs=[r0, r1], missing=0.)
+
+    """
+    results = []
+    def extract(r):
+        if r is None: return missing
+        else: return r[name]
+
+    for rowkey, row in cbook.align_iterators(operator.attrgetter(key), *[iter(r) for r in recs]):
+        results.append([rowkey] + map(extract, row))
+    names = ",".join([key] + ["%s%d" % (name, d) for d in range(len(recs))])
+    return np.rec.fromrecords(results, names=names)
+
 
 def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
             converterd=None, names=None, missing='', missingd=None,