|
5 | 5 |
|
6 | 6 | This example demonstrates how to efficiently visualize large numbers of time |
7 | 7 | series in a way that could potentially reveal hidden substructure and patterns |
8 | | -that are not immediately obvious. |
| 8 | +that are not immediately obvious, and display them in a visually appealing way. |
| 9 | +
|
| 10 | +In this example, we generate multiple sinusoidal "signal" series that are |
| 11 | +buried under a larger number of random walk "noise/background" series. For an |
| 12 | +unbiased Gaussian random walk with standard deviation of σ, the RMS deviation |
| 13 | +from the origin after n steps is σ*sqrt(n). So in order to keep the sinusoids |
| 14 | +visible on the same scale as the random walks, we scale the amplitude by the |
| 15 | +random walk RMS. In addition, we also introduce a small random offset ``phi`` |
| 16 | +to shift the sines left/right, and some additive random noise to shift |
| 17 | +individual data points up/down to make the signal a bit more "realistic" (you |
| 18 | +wouldn't expect a perfect sine wave to appear in your data). |
9 | 19 |
|
10 | 20 | The first plot shows the typical way of visualizing multiple time series by |
11 | | -overlaying them on top of each other with ``plt.plot``. The second and third |
12 | | -plots show how to reinterpret the data as a 2d histogram, with optional |
13 | | -interpolation. |
| 21 | +overlaying them on top of each other with ``plt.plot`` and a small value of |
| 22 | +``alpha``. The second and third plots show how to reinterpret the data as a 2d |
| 23 | +histogram, with optional interpolation between data points, by using |
| 24 | +``np.histogram2d`` and ``plt.pcolormesh``. |
14 | 25 | """ |
15 | 26 | from copy import copy |
16 | 27 | import time |
|
27 | 38 | num_points = 100 |
28 | 39 | SNR = 0.10 # Signal to Noise Ratio |
29 | 40 | x = np.linspace(0, 4 * np.pi, num_points) |
30 | | -# random walk |
| 41 | +# Generate unbiased Gaussian random walks |
31 | 42 | Y = np.cumsum(np.random.randn(num_series, num_points), axis=-1) |
32 | | -# sinusoidal signal |
| 43 | +# Generate sinusoidal signals |
33 | 44 | num_signal = int(round(SNR * num_series)) |
34 | 45 | phi = (np.pi / 8) * np.random.randn(num_signal, 1) # small random offest |
35 | 46 | Y[-num_signal:] = ( |
36 | | - np.sqrt(np.arange(num_points))[None, :] |
| 47 | + np.sqrt(np.arange(num_points))[None, :] # random walk RMS scaling factor |
37 | 48 | * (np.sin(x[None, :] - phi) |
38 | | - + 0.05 * np.random.randn(num_signal, num_points)) |
| 49 | + + 0.05 * np.random.randn(num_signal, num_points)) # small random noise |
39 | 50 | ) |
40 | 51 |
|
41 | | -# Plot it using `plot` and a small value of alpha. With this view it is |
| 52 | + |
| 53 | +# Plot series using `plot` and a small value of `alpha`. With this view it is |
42 | 54 | # very difficult to observe the sinusoidal behavior because of how many |
43 | 55 | # overlapping series there are. It also takes a bit of time to run because so |
44 | | -# many individual artists that need to be generated. |
| 56 | +# many individual artists need to be generated. |
45 | 57 | tic = time.time() |
46 | 58 | axes[0].plot(x, Y.T, color="C0", alpha=0.1) |
47 | 59 | toc = time.time() |
48 | | -axes[0].set_title( |
49 | | - r"Standard time series visualization using line plot") |
50 | | -print(f"{toc-tic:.2f} sec. elapsed") # ~0.26 seconds |
| 60 | +axes[0].set_title("Line plot with alpha") |
| 61 | +print(f"{toc-tic:.2f} sec. elapsed") |
51 | 62 |
|
52 | 63 |
|
53 | 64 | # Now we will convert the multiple time series into a histogram. Not only will |
54 | 65 | # the hidden signal be more visible, but it is also a much quicker procedure. |
55 | 66 | tic = time.time() |
56 | | -# linearly interpolate between the points in each time series |
| 67 | +# Linearly interpolate between the points in each time series |
57 | 68 | num_fine = 800 |
58 | 69 | x_fine = np.linspace(x.min(), x.max(), num_fine) |
59 | 70 | y_fine = np.empty((num_series, num_fine), dtype=float) |
|
70 | 81 | cmap.set_bad(cmap(0)) |
71 | 82 | h, xedges, yedges = np.histogram2d(x_fine, y_fine, bins=[400, 100]) |
72 | 83 | axes[1].pcolormesh(xedges, yedges, h.T, cmap=cmap, norm=LogNorm(vmax=1.5e2)) |
73 | | -axes[1].set_title( |
74 | | - r"Alternative time series vis. using 2d histogram and log color scale") |
| 84 | +axes[1].set_title("2d histogram and log color scale") |
75 | 85 |
|
76 | | -# Same thing on linear color scale but with different (more visible) cmap |
77 | | -h, xedges, yedges = np.histogram2d(x_fine, y_fine, bins=[400, 100]) |
| 86 | +# Same data but on linear color scale |
78 | 87 | axes[2].pcolormesh(xedges, yedges, h.T, cmap=cmap, vmax=1.5e2) |
79 | | -axes[2].set_title( |
80 | | - r"Alternative time series vis. using 2d histogram and linear color scale") |
| 88 | +axes[2].set_title("2d histogram and linear color scale") |
81 | 89 | toc = time.time() |
82 | | -# ~0.08 sec for both plots + interpolation |
83 | 90 | print(f"{toc-tic:.2f} sec. elapsed") |
84 | 91 |
|
85 | 92 | plt.show() |
0 commit comments