|
86 | 86 |
|
87 | 87 | fig, ax = plt.subplots()
|
88 | 88 | ax.hist(xdata, bins=xbins, density=True, **style)
|
89 |
| - |
| 89 | +ax.set_ylabel('Probability (per dx)') |
| 90 | +ax.set_xlabel('x bins (dx=0.5)') |
90 | 91 |
|
91 | 92 | # %%
|
92 | 93 | # This normalization can be a little hard to interpret when just exploring the
|
93 |
| -# data. The value attached to each bar is divided by the total number of data |
94 |
| -# points _and_ the width of the bin, and the values _integrate_ to one when |
95 |
| -# integrating across the full range of data. |
| 94 | +# data. The value attached to each bar is divided by the total number of data |
| 95 | +# points _and_ the width of the bin, and thus the values _integrate_ to one |
| 96 | +# when integrating across the full range of data. |
| 97 | +# e.g. (``density = counts / (sum(counts) * np.diff(bins))``), |
| 98 | +# and (``np.sum(density * np.diff(bins)) == 1``). |
| 99 | +# |
| 100 | +# This normalization is how `probability density functions |
| 101 | +# <https://en.wikipedia.org/wiki/Probability_density_function>`_ are |
| 102 | +# defined in statistics. If :math:`X` is a random variable on :math:`x`, then |
| 103 | +# :math:`f_X` is is the probability density function if :math:`P[a<X<b] = |
| 104 | +# \int_a^b f_X dx`. Note that if the units of x are Volts (for instance), then |
| 105 | +# the units of :math:`f_X` are :math:`V^{-1}` or probability per change in |
| 106 | +# voltage. |
96 | 107 | #
|
97 | 108 | # The usefulness of this normalization is a little more clear when we draw from
|
98 | 109 | # a known distribution and try to compare with theory. So, choose 1000 points
|
99 |
| -# from a normal distribution, and also calculate the known probability density |
100 |
| -# function |
| 110 | +# from a `normal distribution |
| 111 | +# <https://en.wikipedia.org/wiki/Normal_distribution>`_, and also calculate the |
| 112 | +# known probability density function: |
101 | 113 |
|
102 | 114 | xdata = rng.normal(size=1000)
|
103 | 115 | xpdf = np.arange(-4, 4, 0.1)
|
|
118 | 130 |
|
119 | 131 | ax['True'].hist(xdata, bins=xbins, density=True, histtype='step')
|
120 | 132 | ax['True'].plot(xpdf, pdf)
|
121 |
| -ax['True'].set_ylabel('Probability per x') |
| 133 | +ax['True'].set_ylabel('Probability (per dx)') |
122 | 134 | ax['True'].set_xlabel('x bins (below -1.25 bins are wider)')
|
123 | 135 |
|
124 |
| - |
125 | 136 | # %%
|
126 | 137 | # Using *density* also makes it easier to compare histograms with different bin
|
127 | 138 | # widths. Note that in order to get the theoretical distribution, we must
|
|
143 | 154 | # Labels:
|
144 | 155 | ax['False'].set_xlabel('x bins')
|
145 | 156 | ax['False'].set_ylabel('Count per bin')
|
146 |
| -ax['True'].set_ylabel('Probability per x') |
| 157 | +ax['True'].set_ylabel('Probability (per dx)') |
147 | 158 | ax['True'].set_xlabel('x bins')
|
148 | 159 | ax['True'].legend(fontsize='small')
|
149 | 160 |
|
|
182 | 193 |
|
183 | 194 | ax['density'].hist(xdata, bins=xbins, histtype='step', density=True)
|
184 | 195 | ax['density'].hist(xdata2, bins=xbins, histtype='step', density=True)
|
185 |
| -ax['density'].set_ylabel('Probabilty per x') |
| 196 | +ax['density'].set_ylabel('Probability (per dx)') |
186 | 197 | ax['density'].set_title('Density=True')
|
187 | 198 | ax['density'].set_xlabel('x bins')
|
188 | 199 |
|
|
0 commit comments