Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 066823e

Browse files
committed
Rewrite Anscombe's quartet example
1 parent 2eb0023 commit 066823e

File tree

1 file changed

+34
-27
lines changed

1 file changed

+34
-27
lines changed

examples/specialty_plots/anscombe.py

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
"""
22
==================
3-
Anscombe's Quartet
3+
Anscombe's quartet
44
==================
55
6-
"""
7-
"""
8-
Edward Tufte uses this example from Anscombe to show 4 datasets of x
9-
and y that have the same mean, standard deviation, and regression
10-
line, but which are qualitatively different.
6+
`Anscombe's quartet`_ is a group of datasets (x, y) that have the same mean,
7+
standard deviation, and regression line, but which are qualitatively different.
8+
9+
It is often used to illustrate the importance of looking at a set of data
10+
graphically and not only relying on basic statistic properties.
11+
12+
.. _Anscombe's quartet: https://en.wikipedia.org/wiki/Anscombe%27s_quartet
1113
"""
1214

1315
import matplotlib.pyplot as plt
@@ -20,30 +22,35 @@
2022
x4 = [8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8]
2123
y4 = [6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89]
2224

25+
datasets = {
26+
'I': (x, y1),
27+
'II': (x, y2),
28+
'III': (x, y3),
29+
'IV': (x4, y4)
30+
}
2331

24-
def fit(x):
25-
return 3 + 0.5 * x
26-
27-
28-
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)
32+
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(6, 6),
33+
gridspec_kw={'wspace': 0.08, 'hspace': 0.08})
2934
axs[0, 0].set(xlim=(0, 20), ylim=(2, 14))
3035
axs[0, 0].set(xticks=(0, 10, 20), yticks=(4, 8, 12))
3136

32-
xfit = np.array([np.min(x), np.max(x)])
33-
axs[0, 0].plot(x, y1, 'ks', xfit, fit(xfit), 'r-', lw=2)
34-
axs[0, 1].plot(x, y2, 'ks', xfit, fit(xfit), 'r-', lw=2)
35-
axs[1, 0].plot(x, y3, 'ks', xfit, fit(xfit), 'r-', lw=2)
36-
xfit = np.array([np.min(x4), np.max(x4)])
37-
axs[1, 1].plot(x4, y4, 'ks', xfit, fit(xfit), 'r-', lw=2)
38-
39-
for ax, label in zip(axs.flat, ['I', 'II', 'III', 'IV']):
40-
ax.label_outer()
41-
ax.text(3, 12, label, fontsize=20)
42-
43-
# verify the stats
44-
pairs = (x, y1), (x, y2), (x, y3), (x4, y4)
45-
for x, y in pairs:
46-
print('mean=%1.2f, std=%1.2f, r=%1.2f' % (np.mean(y), np.std(y),
47-
np.corrcoef(x, y)[0][1]))
37+
for ax, (label, (x, y)) in zip(axs.flat, datasets.items()):
38+
ax.text(0.1, 0.9, label, fontsize=20, transform=ax.transAxes, va='top')
39+
ax.tick_params(direction='in', top=True, right=True)
40+
ax.plot(x, y, 'o')
41+
42+
# linear regression
43+
p1, p0 = np.polyfit(x, y, deg=1)
44+
x_lin = np.array([np.min(x), np.max(x)])
45+
y_lin = p1 * x_lin + p0
46+
ax.plot(x_lin, y_lin, 'r-', lw=2)
47+
48+
# add text box for the statistics
49+
stats = (f'$\\mu$ = {np.mean(y):.2f}\n'
50+
f'$\\sigma$ = {np.std(y):.2f}\n'
51+
f'$r$ = {np.corrcoef(x, y)[0][1]:.2f}')
52+
bbox = dict(boxstyle='round', fc='blanchedalmond', ec='orange', alpha=0.5)
53+
ax.text(0.95, 0.07, stats, fontsize=9, bbox=bbox,
54+
transform=ax.transAxes, horizontalalignment='right')
4855

4956
plt.show()

0 commit comments

Comments
 (0)