From a1ce3fecab142c5ad750e5df541fe242d5cfca3e Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Wed, 20 Jun 2012 15:10:26 +0300
Subject: [PATCH 1/4] Added probability plotting

---
 pandas/tools/plotting.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 515434edda6b0..475ba317ea3d0 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -202,6 +202,36 @@ def lag_plot(series, ax=None, **kwds):
     ax.scatter(y1, y2, **kwds)
     return ax
 
+def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds):
+    """Probability plot for uni-variate data.
+
+    Parameters:
+    -----------
+    series: Time series
+    ax: Matplotlib  axis object, optional
+    dist: Distribution name, one supported by scipy 
+        http://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions
+    sparams: Distribution parameters (location, scale).
+    kwds: Matplotlib scatter method keyword arguments, optional
+
+    Returns:
+    --------
+    ax: Matplotlib axis object
+    """
+    import matplotlib.pyplot as plt
+    from scipy.stats import probplot
+    if ax == None:
+        ax = plt.gca()
+    data = series.values
+    (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=sparams)
+    ax.scatter(x, y, **kwds)
+    y1, y2 = ax.get_ylim()
+    x1, x2 = (y1 - intercept) / slope, (y2 - intercept) / slope
+    ax.plot([x1, x2], [y1, y2], color='grey')
+    ax.set_xlabel("Theoretical Quantiles")
+    ax.set_ylabel("Sample Quantiles")
+    return ax
+
 def autocorrelation_plot(series, ax=None):
     """Autocorrelation plot for time series.
 

From 72c42ab5d32620cb86e9fdb006a567a7fd522af6 Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Wed, 20 Jun 2012 15:30:31 +0300
Subject: [PATCH 2/4] Added tests for probability plot

---
 pandas/tests/test_graphics.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 8326445f28cb0..f6843a3f0eb21 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -86,6 +86,13 @@ def test_lag_plot(self):
         from pandas.tools.plotting import lag_plot
         _check_plot_works(lag_plot, self.ts)
 
+    @slow
+    def test_probability_plot(self):
+        from pandas.tools.plotting import probability_plot
+        _check_plot_works(probability_plot, self.ts)
+        _check_plot_works(probability_plot, self.ts, marker='+', color='black')
+        _check_plot_works(probability_plot, self.ts, dist='cauchy', sparams=(1.0, 0.01), marker='+', color='black')
+
 class TestDataFramePlots(unittest.TestCase):
 
     @classmethod

From 95e6504dedab7a4276a320c6147f5c3378bd3b58 Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Wed, 20 Jun 2012 15:47:10 +0300
Subject: [PATCH 3/4] Added probability plot documentation

---
 doc/source/visualization.rst | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 34bd803516468..f2676e6423b2f 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -325,3 +325,28 @@ confidence band.
 
    @savefig autocorrelation_plot.png width=6in
    autocorrelation_plot(data)
+
+Probability Plot
+~~~~~~~~~~~~~~~~
+
+Probability plots are used to check if given data follows some probability 
+distribution. With default parameters it plots against normal distribution.
+The data are plotted against the theoretical distribution in such a way that
+if the data follow the distribution it should display a straight line.
+
+.. ipython:: python
+   
+   from pandas.tools.plotting import probability_plot
+
+   plt.figure()
+
+   u_data = Series(np.random.random(1000))
+   n_data = Series(np.random.randn(1000))
+
+   @savefig probability_plot_u.png width=6in
+   probability_plot(u_data, dist='norm', marker='+', color='black')
+
+   plt.figure()
+
+   @savefig probability_plot_n.png width=6in
+   probability_plot(n_data, dist='norm', marker='+', color='black')
\ No newline at end of file

From c63438007ae10056316ce837251a170f1204fdc4 Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Thu, 9 Aug 2012 18:25:08 +0300
Subject: [PATCH 4/4] Updated probability_plot

---
 pandas/tests/test_graphics.py |  2 +-
 pandas/tools/plotting.py      | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index f6843a3f0eb21..aabcb3d8642cd 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -91,7 +91,7 @@ def test_probability_plot(self):
         from pandas.tools.plotting import probability_plot
         _check_plot_works(probability_plot, self.ts)
         _check_plot_works(probability_plot, self.ts, marker='+', color='black')
-        _check_plot_works(probability_plot, self.ts, dist='cauchy', sparams=(1.0, 0.01), marker='+', color='black')
+        _check_plot_works(probability_plot, self.ts, dist='cauchy', distargs=(1.0, 0.01), marker='+', color='black')
 
 class TestDataFramePlots(unittest.TestCase):
 
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 475ba317ea3d0..5ac153df71966 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -202,7 +202,7 @@ def lag_plot(series, ax=None, **kwds):
     ax.scatter(y1, y2, **kwds)
     return ax
 
-def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds):
+def probability_plot(series, ax=None, dist='norm', distargs=(), **kwds):
     """Probability plot for uni-variate data.
 
     Parameters:
@@ -211,26 +211,26 @@ def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds):
     ax: Matplotlib  axis object, optional
     dist: Distribution name, one supported by scipy 
         http://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions
-    sparams: Distribution parameters (location, scale).
+    distargs: Distribution specific parameters usually location and scale.
     kwds: Matplotlib scatter method keyword arguments, optional
 
     Returns:
     --------
-    ax: Matplotlib axis object
+    fig: Matplotlib figure object
     """
     import matplotlib.pyplot as plt
     from scipy.stats import probplot
     if ax == None:
         ax = plt.gca()
     data = series.values
-    (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=sparams)
+    (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=distargs)
     ax.scatter(x, y, **kwds)
     y1, y2 = ax.get_ylim()
     x1, x2 = (y1 - intercept) / slope, (y2 - intercept) / slope
     ax.plot([x1, x2], [y1, y2], color='grey')
     ax.set_xlabel("Theoretical Quantiles")
     ax.set_ylabel("Sample Quantiles")
-    return ax
+    return ax.get_figure()
 
 def autocorrelation_plot(series, ax=None):
     """Autocorrelation plot for time series.