From a1ce3fecab142c5ad750e5df541fe242d5cfca3e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 20 Jun 2012 15:10:26 +0300 Subject: [PATCH 1/4] Added probability plotting --- pandas/tools/plotting.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 515434edda6b0..475ba317ea3d0 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -202,6 +202,36 @@ def lag_plot(series, ax=None, **kwds): ax.scatter(y1, y2, **kwds) return ax +def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds): + """Probability plot for uni-variate data. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + dist: Distribution name, one supported by scipy + http://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions + sparams: Distribution parameters (location, scale). + kwds: Matplotlib scatter method keyword arguments, optional + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + from scipy.stats import probplot + if ax == None: + ax = plt.gca() + data = series.values + (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=sparams) + ax.scatter(x, y, **kwds) + y1, y2 = ax.get_ylim() + x1, x2 = (y1 - intercept) / slope, (y2 - intercept) / slope + ax.plot([x1, x2], [y1, y2], color='grey') + ax.set_xlabel("Theoretical Quantiles") + ax.set_ylabel("Sample Quantiles") + return ax + def autocorrelation_plot(series, ax=None): """Autocorrelation plot for time series. From 72c42ab5d32620cb86e9fdb006a567a7fd522af6 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 20 Jun 2012 15:30:31 +0300 Subject: [PATCH 2/4] Added tests for probability plot --- pandas/tests/test_graphics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 8326445f28cb0..f6843a3f0eb21 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -86,6 +86,13 @@ def test_lag_plot(self): from pandas.tools.plotting import lag_plot _check_plot_works(lag_plot, self.ts) + @slow + def test_probability_plot(self): + from pandas.tools.plotting import probability_plot + _check_plot_works(probability_plot, self.ts) + _check_plot_works(probability_plot, self.ts, marker='+', color='black') + _check_plot_works(probability_plot, self.ts, dist='cauchy', sparams=(1.0, 0.01), marker='+', color='black') + class TestDataFramePlots(unittest.TestCase): @classmethod From 95e6504dedab7a4276a320c6147f5c3378bd3b58 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 20 Jun 2012 15:47:10 +0300 Subject: [PATCH 3/4] Added probability plot documentation --- doc/source/visualization.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 34bd803516468..f2676e6423b2f 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -325,3 +325,28 @@ confidence band. @savefig autocorrelation_plot.png width=6in autocorrelation_plot(data) + +Probability Plot +~~~~~~~~~~~~~~~~ + +Probability plots are used to check if given data follows some probability +distribution. With default parameters it plots against normal distribution. +The data are plotted against the theoretical distribution in such a way that +if the data follow the distribution it should display a straight line. + +.. ipython:: python + + from pandas.tools.plotting import probability_plot + + plt.figure() + + u_data = Series(np.random.random(1000)) + n_data = Series(np.random.randn(1000)) + + @savefig probability_plot_u.png width=6in + probability_plot(u_data, dist='norm', marker='+', color='black') + + plt.figure() + + @savefig probability_plot_n.png width=6in + probability_plot(n_data, dist='norm', marker='+', color='black') \ No newline at end of file From c63438007ae10056316ce837251a170f1204fdc4 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 9 Aug 2012 18:25:08 +0300 Subject: [PATCH 4/4] Updated probability_plot --- pandas/tests/test_graphics.py | 2 +- pandas/tools/plotting.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index f6843a3f0eb21..aabcb3d8642cd 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -91,7 +91,7 @@ def test_probability_plot(self): from pandas.tools.plotting import probability_plot _check_plot_works(probability_plot, self.ts) _check_plot_works(probability_plot, self.ts, marker='+', color='black') - _check_plot_works(probability_plot, self.ts, dist='cauchy', sparams=(1.0, 0.01), marker='+', color='black') + _check_plot_works(probability_plot, self.ts, dist='cauchy', distargs=(1.0, 0.01), marker='+', color='black') class TestDataFramePlots(unittest.TestCase): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 475ba317ea3d0..5ac153df71966 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -202,7 +202,7 @@ def lag_plot(series, ax=None, **kwds): ax.scatter(y1, y2, **kwds) return ax -def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds): +def probability_plot(series, ax=None, dist='norm', distargs=(), **kwds): """Probability plot for uni-variate data. Parameters: @@ -211,26 +211,26 @@ def probability_plot(series, ax=None, dist='norm', sparams=(), **kwds): ax: Matplotlib axis object, optional dist: Distribution name, one supported by scipy http://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions - sparams: Distribution parameters (location, scale). + distargs: Distribution specific parameters usually location and scale. kwds: Matplotlib scatter method keyword arguments, optional Returns: -------- - ax: Matplotlib axis object + fig: Matplotlib figure object """ import matplotlib.pyplot as plt from scipy.stats import probplot if ax == None: ax = plt.gca() data = series.values - (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=sparams) + (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=distargs) ax.scatter(x, y, **kwds) y1, y2 = ax.get_ylim() x1, x2 = (y1 - intercept) / slope, (y2 - intercept) / slope ax.plot([x1, x2], [y1, y2], color='grey') ax.set_xlabel("Theoretical Quantiles") ax.set_ylabel("Sample Quantiles") - return ax + return ax.get_figure() def autocorrelation_plot(series, ax=None): """Autocorrelation plot for time series.