|
2660 | 2660 | <pre><code class="python language-python hljs"><span class="hljs-meta">>>> </span>sr = Series([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], index=[<span class="hljs-string">'x'</span>, <span class="hljs-string">'y'</span>])
|
2661 | 2661 | x <span class="hljs-number">1</span>
|
2662 | 2662 | y <span class="hljs-number">2</span>
|
2663 |
| -dtype: int64 |
2664 | 2663 | </code></pre>
|
2665 | 2664 | <pre><code class="python hljs">┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
|
2666 | 2665 | ┃ │ <span class="hljs-string">'sum'</span> │ [<span class="hljs-string">'sum'</span>] │ {<span class="hljs-string">'s'</span>: <span class="hljs-string">'sum'</span>} ┃
|
2667 | 2666 | ┠─────────────┼─────────────┼─────────────┼───────────────┨
|
2668 |
| -┃ sr.apply(…) │ <span class="hljs-number">3</span> │ sum <span class="hljs-number">3</span> │ s <span class="hljs-number">3</span> ┃ |
| 2667 | +┃ sr.apply(…) │ <span class="hljs-number">3</span> │ sum <span class="hljs-number">3</span> │ s <span class="hljs-number">3</span> ┃ |
2669 | 2668 | ┃ sr.agg(…) │ │ │ ┃
|
2670 | 2669 | ┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
|
2671 | 2670 | </code></pre>
|
|
2677 | 2676 | ┃ sr.trans(…) │ y <span class="hljs-number">2</span> │ y <span class="hljs-number">2</span> │ y <span class="hljs-number">2</span> ┃
|
2678 | 2677 | ┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
|
2679 | 2678 | </code></pre>
|
| 2679 | +<ul> |
| 2680 | +<li><strong>Last result has a hierarchical index. <code class="python hljs"><span class="hljs-string">'<Sr>[<key_1>, <key_2>]'</span></code> returns the value.</strong></li> |
| 2681 | +</ul> |
2680 | 2682 | <div><h3 id="dataframe">DataFrame</h3><p><strong>Table with labeled rows and columns.</strong></p><pre><code class="python language-python hljs"><span class="hljs-meta">>>> </span>DataFrame([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]], index=[<span class="hljs-string">'a'</span>, <span class="hljs-string">'b'</span>], columns=[<span class="hljs-string">'x'</span>, <span class="hljs-string">'y'</span>])
|
2681 | 2683 | x y
|
2682 | 2684 | a <span class="hljs-number">1</span> <span class="hljs-number">2</span>
|
|
2839 | 2841 | continents = pd.read_csv(<span class="hljs-string">'https://datahub.io/JohnSnowLabs/country-and-continent-codes-'</span> + \
|
2840 | 2842 | <span class="hljs-string">'list/r/country-and-continent-codes-list-csv.csv'</span>,
|
2841 | 2843 | usecols=[<span class="hljs-string">'Three_Letter_Country_Code'</span>, <span class="hljs-string">'Continent_Name'</span>])
|
2842 |
| -merged = pd.merge(covid, continents, left_on=<span class="hljs-string">'iso_code'</span>, right_on=<span class="hljs-string">'Three_Letter_Country_Code'</span>) |
2843 |
| -summed = merged.groupby([<span class="hljs-string">'Continent_Name'</span>, <span class="hljs-string">'date'</span>]).sum().reset_index() |
2844 |
| -summed[<span class="hljs-string">'Total Deaths per Million'</span>] = summed.total_deaths * <span class="hljs-number">1e6</span> / summed.population |
2845 |
| -summed = summed[(<span class="hljs-string">'2020-03-14'</span> < summed.date) & (summed.date < <span class="hljs-string">'2020-06-25'</span>)] |
2846 |
| -summed = summed.rename({<span class="hljs-string">'date'</span>: <span class="hljs-string">'Date'</span>, <span class="hljs-string">'Continent_Name'</span>: <span class="hljs-string">'Continent'</span>}, axis=<span class="hljs-string">'columns'</span>) |
2847 |
| -plotly.express.line(summed, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">'Total Deaths per Million'</span>, color=<span class="hljs-string">'Continent'</span>).show() |
| 2844 | +df = pd.merge(covid, continents, left_on=<span class="hljs-string">'iso_code'</span>, right_on=<span class="hljs-string">'Three_Letter_Country_Code'</span>) |
| 2845 | +df = df.groupby([<span class="hljs-string">'Continent_Name'</span>, <span class="hljs-string">'date'</span>]).sum().reset_index() |
| 2846 | +df[<span class="hljs-string">'Total Deaths per Million'</span>] = df.total_deaths * <span class="hljs-number">1e6</span> / df.population |
| 2847 | +df = df[(<span class="hljs-string">'2020-03-14'</span> < df.date) & (df.date < <span class="hljs-string">'2020-06-25'</span>)] |
| 2848 | +df = df.rename({<span class="hljs-string">'date'</span>: <span class="hljs-string">'Date'</span>, <span class="hljs-string">'Continent_Name'</span>: <span class="hljs-string">'Continent'</span>}, axis=<span class="hljs-string">'columns'</span>) |
| 2849 | +plotly.express.line(df, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">'Total Deaths per Million'</span>, color=<span class="hljs-string">'Continent'</span>).show() |
2848 | 2850 | </code></pre></div></div>
|
2849 | 2851 |
|
2850 | 2852 |
|
|
2857 | 2859 | display_data(wrangle_data(*scrape_data()))
|
2858 | 2860 |
|
2859 | 2861 | <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_data</span><span class="hljs-params">()</span>:</span>
|
2860 |
| - <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_yah</span><span class="hljs-params">(id_)</span>:</span> |
| 2862 | + <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_yahoo</span><span class="hljs-params">(id_)</span>:</span> |
2861 | 2863 | BASE_URL = <span class="hljs-string">'https://query1.finance.yahoo.com/v7/finance/download/'</span>
|
2862 | 2864 | now = int(datetime.datetime.now().timestamp())
|
2863 | 2865 | url = <span class="hljs-string">f'<span class="hljs-subst">{BASE_URL}</span><span class="hljs-subst">{id_}</span>?period1=1579651200&period2=<span class="hljs-subst">{now}</span>&interval=1d&events=history'</span>
|
2864 | 2866 | <span class="hljs-keyword">return</span> pandas.read_csv(url, usecols=[<span class="hljs-string">'Date'</span>, <span class="hljs-string">'Close'</span>]).set_index(<span class="hljs-string">'Date'</span>).Close
|
2865 | 2867 | covid = pd.read_csv(<span class="hljs-string">'https://covid.ourworldindata.org/data/owid-covid-data.csv'</span>,
|
2866 | 2868 | usecols=[<span class="hljs-string">'date'</span>, <span class="hljs-string">'total_cases'</span>])
|
2867 | 2869 | covid = covid.groupby(<span class="hljs-string">'date'</span>).sum()
|
2868 |
| - dow_jones, gold, bitcoin = scrape_yah(<span class="hljs-string">'^DJI'</span>), scrape_yah(<span class="hljs-string">'GC=F'</span>), scrape_yah(<span class="hljs-string">'BTC-USD'</span>) |
2869 |
| - dow_jones.name, gold.name, bitcoin.name = <span class="hljs-string">'Dow Jones'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Bitcoin'</span> |
2870 |
| - <span class="hljs-keyword">return</span> covid, dow_jones, gold, bitcoin |
2871 |
| - |
2872 |
| -<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">wrangle_data</span><span class="hljs-params">(covid, dow_jones, gold, bitcoin)</span>:</span> |
2873 |
| - out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=<span class="hljs-number">1</span>) |
2874 |
| - out = out.loc[<span class="hljs-string">'2020-02-23'</span>:].iloc[:<span class="hljs-number">-2</span>] |
2875 |
| - out = out.interpolate() |
2876 |
| - out.iloc[:, <span class="hljs-number">1</span>:] = out.rolling(<span class="hljs-number">10</span>, min_periods=<span class="hljs-number">1</span>, center=<span class="hljs-keyword">True</span>).mean().iloc[:, <span class="hljs-number">1</span>:] |
2877 |
| - out.iloc[:, <span class="hljs-number">1</span>:] = out.iloc[:, <span class="hljs-number">1</span>:] / out.iloc[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>:] * <span class="hljs-number">100</span> |
2878 |
| - <span class="hljs-keyword">return</span> out |
2879 |
| - |
2880 |
| -<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">display_data</span><span class="hljs-params">(out)</span>:</span> |
| 2870 | + dow, gold, bitcoin = [scrape_yahoo(id_) <span class="hljs-keyword">for</span> id_ <span class="hljs-keyword">in</span> (<span class="hljs-string">'^DJI'</span>, <span class="hljs-string">'GC=F'</span>, <span class="hljs-string">'BTC-USD'</span>)] |
| 2871 | + dow.name, gold.name, bitcoin.name = <span class="hljs-string">'Dow Jones'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Bitcoin'</span> |
| 2872 | + <span class="hljs-keyword">return</span> covid, dow, gold, bitcoin |
| 2873 | + |
| 2874 | +<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">wrangle_data</span><span class="hljs-params">(covid, dow, gold, bitcoin)</span>:</span> |
| 2875 | + df = pandas.concat([covid, dow, gold, bitcoin], axis=<span class="hljs-number">1</span>) |
| 2876 | + df = df.loc[<span class="hljs-string">'2020-02-23'</span>:].iloc[:<span class="hljs-number">-2</span>] |
| 2877 | + df = df.interpolate() |
| 2878 | + df.iloc[:, <span class="hljs-number">1</span>:] = df.rolling(<span class="hljs-number">10</span>, min_periods=<span class="hljs-number">1</span>, center=<span class="hljs-keyword">True</span>).mean().iloc[:, <span class="hljs-number">1</span>:] |
| 2879 | + df.iloc[:, <span class="hljs-number">1</span>:] = df.iloc[:, <span class="hljs-number">1</span>:] / df.iloc[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>:] * <span class="hljs-number">100</span> |
| 2880 | + <span class="hljs-keyword">return</span> df |
| 2881 | + |
| 2882 | +<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">display_data</span><span class="hljs-params">(df)</span>:</span> |
2881 | 2883 | <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">get_trace</span><span class="hljs-params">(col_name)</span>:</span>
|
2882 |
| - <span class="hljs-keyword">return</span> go.Scatter(x=out.index, y=out[col_name], name=col_name, yaxis=<span class="hljs-string">'y2'</span>) |
2883 |
| - traces = [get_trace(col_name) <span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> out.columns[<span class="hljs-number">1</span>:]] |
2884 |
| - traces.append(go.Scatter(x=out.index, y=out.total_cases, name=<span class="hljs-string">'Total Cases'</span>, yaxis=<span class="hljs-string">'y1'</span>)) |
| 2884 | + <span class="hljs-keyword">return</span> go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=<span class="hljs-string">'y2'</span>) |
| 2885 | + traces = [get_trace(col_name) <span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> df.columns[<span class="hljs-number">1</span>:]] |
| 2886 | + traces.append(go.Scatter(x=df.index, y=df.total_cases, name=<span class="hljs-string">'Total Cases'</span>, yaxis=<span class="hljs-string">'y1'</span>)) |
2885 | 2887 | figure = go.Figure()
|
2886 | 2888 | figure.add_traces(traces)
|
2887 | 2889 | figure.update_layout(
|
|
0 commit comments