-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathw_helpers.py
More file actions
153 lines (118 loc) · 3.69 KB
/
w_helpers.py
File metadata and controls
153 lines (118 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import datetime
import pandas as pd
import os
from pathlib import Path
def aggregate_by_month(df, col='T'):
"""Given a data frame of hourly data, compute statistics by month
Parameters
----------
df : DataFrame
Must have columns {'year', 'month'}
col : str, optional
The column to aggregate. Defaults to 'T'
Returns
-------
DataFrame
Indexed on the 15th of the month,
Has columns of `describe` + 'year' and 'month'
"""
gb = df.groupby(('year', 'month'))[col].describe()
new_index = [datetime.date(*m, *(15, )) for m in gb.index]
gb.reset_index(inplace=True)
gb.index = new_index
return gb
def aggregate_by_day(df, col='T'):
"""Given a data frame of hourly data, compute statistics by day
Parameters
----------
df : DataFrame
Must have columns {'year', 'month', 'day'}
col : str, optional
The column to aggregate. Defaults to 'T'
Returns
-------
DataFrame
Indexed by day.
Has columns of `describe` + 'year', 'month', and 'day'
"""
gb = df.groupby(('year', 'month', 'day'))[col].describe()
new_index = [datetime.date(*m) for m in gb.index]
gb.reset_index(inplace=True)
gb.index = new_index
return gb
def extract_month_of_daily(daily, year, month):
"""Given daily values, extract a given month
Parameters
----------
daily : DataFrame
must of columns {'year', 'month'}
year, month : int
The year and month of interest
Returns
-------
DataFrame
Indexed on days from start of month. Same columns as input
"""
ix = (daily['month'] == month) & (daily['year'] == year)
df = daily[ix]
idx = [(m - df.index[0]).days for m in df.index]
df.reset_index(inplace=True)
df.index = idx
return df
def extract_day_of_hourly(hourly_df, year, month, day):
"""Given a data frame with hourly data, extract data for year-month-day
Parameters
----------
hourly_df : DataFrame
Must have columns 'year', 'month', 'day' with the expected semantics and
a time index
year, month, day : int
The day to extract the data for
"""
ix = ((hourly_df['month'] == month) &
(hourly_df['year'] == year) &
(hourly_df['day'] == day))
df = hourly_df[ix]
midnight = datetime.datetime(year, month, day, 0, 0)
df.index = [(m - midnight).seconds / 3600 for m in df.index]
return df
def label_date(ax, label, date, df):
'''Helper function to annotate a date
``date`` is assumed to be in the index of ``df``
Parameters
----------
ax : Axes
The axes to draw to
label : str
The text of the label
date : object in index of df
The x coordinate
df : DataFrame
The data source
'''
y = df.loc[date]['mean']
return ax.annotate(label, (date, y),
ha='right',
xytext=(-10, -30),
textcoords='offset points',
arrowprops={'arrowstyle': '->'})
def load_data(dataset):
"""Load data from a given dataset
Parameters
----------
dataset : str
Searches from dataset.h5 in this file's directory
Returns
-------
DataFrame
Hourly temperature data
"""
p = Path(os.path.dirname(os.path.realpath(__file__))) / 'data'
fname = p / f'{dataset}.h5'
try:
return pd.read_hdf(str(fname))
except FileNotFoundError:
sources = {f.stem for f in p.iterdir() if
f.is_file() and f.name.endswith('h5')}
raise RuntimeError(f"Could not not find {dataset!r}. Existing "
f"datasets are {sources}")