DOC: normalizing histograms

jklymak · jklymak · commit 042d3669dbf5 · 2023-12-03T10:57:44.000-08:00
diff --git a/galleries/examples/statistics/histogram_normalization.py b/galleries/examples/statistics/histogram_normalization.py
@@ -86,18 +86,30 @@
 
 fig, ax = plt.subplots()
 ax.hist(xdata, bins=xbins, density=True, **style)
-
+ax.set_ylabel('Probability (per dx)')
+ax.set_xlabel('x bins (dx=0.5)')
 
 # %%
 # This normalization can be a little hard to interpret when just exploring the
-# data.  The value attached to each bar is divided by the total number of data
-# points _and_ the width of the bin, and the values _integrate_ to one when
-# integrating across the full range of data.
+# data. The value attached to each bar is divided by the total number of data
+# points _and_ the width of the bin, and thus the values _integrate_ to one
+# when integrating across the full range of data.
+# e.g. (``density = counts / (sum(counts) * np.diff(bins))``),
+# and (``np.sum(density * np.diff(bins)) == 1``).
+#
+# This normalization is how `probability density functions
+# <https://en.wikipedia.org/wiki/Probability_density_function>`_ are
+# defined in statistics.  If :math:`X` is a random variable on :math:`x`, then
+# :math:`f_X` is is the probability density function if :math:`P[a<X<b] =
+# \int_a^b f_X dx`. Note that if the units of x are Volts (for instance), then
+# the units of :math:`f_X` are :math:`V^{-1}` or probability per change in
+# voltage.
 #
 # The usefulness of this normalization is a little more clear when we draw from
 # a known distribution and try to compare with theory.  So, choose 1000 points
-# from a normal distribution, and also calculate the known probability density
-# function
+# from a `normal distribution
+# <https://en.wikipedia.org/wiki/Normal_distribution>`_, and also calculate the
+# known probability density function:
 
 xdata = rng.normal(size=1000)
 xpdf = np.arange(-4, 4, 0.1)
@@ -118,10 +130,9 @@
 
 ax['True'].hist(xdata, bins=xbins, density=True, histtype='step')
 ax['True'].plot(xpdf, pdf)
-ax['True'].set_ylabel('Probability per x')
+ax['True'].set_ylabel('Probability (per dx)')
 ax['True'].set_xlabel('x bins (below -1.25 bins are wider)')
 
-
 # %%
 # Using *density* also makes it easier to compare histograms with different bin
 # widths. Note that in order to get the theoretical distribution, we must
@@ -143,7 +154,7 @@
 # Labels:
 ax['False'].set_xlabel('x bins')
 ax['False'].set_ylabel('Count per bin')
-ax['True'].set_ylabel('Probability per x')
+ax['True'].set_ylabel('Probability (per dx)')
 ax['True'].set_xlabel('x bins')
 ax['True'].legend(fontsize='small')
 
@@ -182,7 +193,7 @@
 
 ax['density'].hist(xdata, bins=xbins, histtype='step', density=True)
 ax['density'].hist(xdata2, bins=xbins, histtype='step', density=True)
-ax['density'].set_ylabel('Probabilty per x')
+ax['density'].set_ylabel('Probability (per dx)')
 ax['density'].set_title('Density=True')
 ax['density'].set_xlabel('x bins')