Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6046297

Browse files
authored
Add dataclasses to perf alert tool and refactor code. (#28889)
* Refactor code with Dataclasses Refactor * Add pipe to add extra line for test_description * Fix lint
1 parent 76fbb8e commit 6046297

6 files changed

Lines changed: 284 additions & 167 deletions

File tree

sdks/python/apache_beam/testing/analyzers/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
#
17+
18+
"""
19+
Peformance alert tooling for Apache Beam. No backwards compatibility
20+
guarantees.
21+
"""

sdks/python/apache_beam/testing/analyzers/github_issues_utils.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
from typing import Optional
2222
from typing import Tuple
2323

24-
import pandas as pd
2524
import requests
2625

2726
from apache_beam.testing.analyzers import constants
27+
from apache_beam.testing.analyzers.perf_analysis_utils import MetricContainer
28+
from apache_beam.testing.analyzers.perf_analysis_utils import TestConfigContainer
2829

2930
try:
3031
_GITHUB_TOKEN: Optional[str] = os.environ['GITHUB_TOKEN']
@@ -140,25 +141,18 @@ def add_awaiting_triage_label(issue_number: int):
140141

141142

142143
def get_issue_description(
143-
test_id: str,
144-
test_name: Optional[str],
145-
metric_name: str,
146-
timestamps: List[pd.Timestamp],
147-
metric_values: List,
144+
test_config_container: TestConfigContainer,
145+
metric_container: MetricContainer,
148146
change_point_index: int,
149147
max_results_to_display: int = 5,
150-
test_description: Optional[str] = None,
151148
) -> str:
152149
"""
153150
Args:
154-
metric_name: Metric name used for the Change Point Analysis.
155-
timestamps: Timestamps of the metrics when they were published to the
156-
Database. Timestamps are expected in ascending order.
157-
metric_values: metric values for the previous runs.
158-
change_point_index: Index for the change point. The element in the
159-
index of the metric_values would be the change point.
160-
max_results_to_display: Max number of results to display from the change
161-
point index, in both directions of the change point index.
151+
test_config_container: TestConfigContainer containing test metadata.
152+
metric_container: MetricContainer containing metric data.
153+
change_point_index: Index of the change point in the metric data.
154+
max_results_to_display: Max number of results to display from the change
155+
point index, in both directions of the change point index.
162156
163157
Returns:
164158
str: Description used to fill the GitHub issues description.
@@ -168,25 +162,30 @@ def get_issue_description(
168162

169163
description = []
170164

171-
description.append(_ISSUE_DESCRIPTION_TEMPLATE.format(test_id, metric_name))
165+
description.append(
166+
_ISSUE_DESCRIPTION_TEMPLATE.format(
167+
test_config_container.test_id, test_config_container.metric_name))
172168

173-
if test_name:
174-
description.append(("`test_name:` " + f'{test_name}'))
169+
if test_config_container.test_name:
170+
description.append(("`test_name:` " + f'{test_config_container.test_name}'))
175171

176-
if test_description:
177-
description.append(("`Test description:` " + f'{test_description}'))
172+
if test_config_container.test_description:
173+
description.append(
174+
("`Test description:` " + f'{test_config_container.test_description}'))
178175

179176
description.append('```')
180177

181178
runs_to_display = []
182179
max_timestamp_index = min(
183-
change_point_index + max_results_to_display, len(metric_values) - 1)
180+
change_point_index + max_results_to_display,
181+
len(metric_container.values) - 1)
184182
min_timestamp_index = max(0, change_point_index - max_results_to_display)
185183

186184
# run in reverse to display the most recent runs first.
187185
for i in reversed(range(min_timestamp_index, max_timestamp_index + 1)):
188186
row_template = _METRIC_INFO_TEMPLATE.format(
189-
timestamps[i].ctime(), format(metric_values[i], '.2f'))
187+
metric_container.timestamps[i].ctime(),
188+
format(metric_container.values[i], '.2f'))
190189
if i == change_point_index:
191190
row_template += constants._ANOMALY_MARKER
192191
runs_to_display.append(row_template)

sdks/python/apache_beam/testing/analyzers/perf_analysis.py

Lines changed: 94 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -34,66 +34,103 @@
3434

3535
from apache_beam.testing.analyzers import constants
3636
from apache_beam.testing.analyzers.perf_analysis_utils import BigQueryMetricsFetcher
37+
from apache_beam.testing.analyzers.perf_analysis_utils import ChangePointConfig
3738
from apache_beam.testing.analyzers.perf_analysis_utils import GitHubIssueMetaData
3839
from apache_beam.testing.analyzers.perf_analysis_utils import MetricsFetcher
40+
from apache_beam.testing.analyzers.perf_analysis_utils import TestConfigContainer
3941
from apache_beam.testing.analyzers.perf_analysis_utils import create_performance_alert
4042
from apache_beam.testing.analyzers.perf_analysis_utils import find_latest_change_point_index
4143
from apache_beam.testing.analyzers.perf_analysis_utils import get_existing_issues_data
4244
from apache_beam.testing.analyzers.perf_analysis_utils import is_change_point_in_valid_window
43-
from apache_beam.testing.analyzers.perf_analysis_utils import is_perf_alert
45+
from apache_beam.testing.analyzers.perf_analysis_utils import is_sibling_change_point
4446
from apache_beam.testing.analyzers.perf_analysis_utils import publish_issue_metadata_to_big_query
4547
from apache_beam.testing.analyzers.perf_analysis_utils import read_test_config
46-
from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
48+
49+
50+
def get_test_config_container(
51+
params: Dict[str, Any],
52+
test_id: str,
53+
) -> TestConfigContainer:
54+
"""
55+
Args:
56+
params: Dict containing parameters to run change point analysis.
57+
Returns:
58+
TestConfigContainer object containing test config parameters.
59+
"""
60+
return TestConfigContainer(
61+
project=params['project'],
62+
metrics_dataset=params['metrics_dataset'],
63+
metrics_table=params['metrics_table'],
64+
metric_name=params['metric_name'],
65+
test_id=test_id,
66+
test_description=params['test_description'],
67+
test_name=params.get('test_name', None),
68+
labels=params.get('labels', None),
69+
)
70+
71+
72+
def get_change_point_config(params: Dict[str, Any], ) -> ChangePointConfig:
73+
"""
74+
Args:
75+
params: Dict containing parameters to run change point analysis.
76+
Returns:
77+
ChangePointConfig object containing change point analysis parameters.
78+
"""
79+
return ChangePointConfig(
80+
min_runs_between_change_points=params.get(
81+
'min_runs_between_change_points',
82+
constants._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS),
83+
num_runs_in_change_point_window=params.get(
84+
'num_runs_in_change_point_window',
85+
constants._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW))
4786

4887

4988
def run_change_point_analysis(
50-
params, test_id, big_query_metrics_fetcher: MetricsFetcher):
89+
test_config_container: TestConfigContainer,
90+
big_query_metrics_fetcher: MetricsFetcher,
91+
change_point_config: ChangePointConfig = ChangePointConfig(),
92+
):
5193
"""
5294
Args:
53-
params: Dict containing parameters to run change point analysis.
54-
test_id: Test id for the current test.
95+
test_config_container: TestConfigContainer containing test metadata for
96+
fetching data and running change point analysis.
5597
big_query_metrics_fetcher: BigQuery metrics fetcher used to fetch data for
5698
change point analysis.
99+
change_point_config: ChangePointConfig containing parameters to run
100+
change point analysis.
57101
Returns:
58102
bool indicating if a change point is observed and alerted on GitHub.
59103
"""
60-
logging.info("Running change point analysis for test ID %s" % test_id)
61-
if not validate_config(params.keys()):
62-
raise ValueError(
63-
f"Please make sure all these keys {constants._PERF_TEST_KEYS} "
64-
f"are specified for the {test_id}")
65-
66-
metric_name = params['metric_name']
104+
logging.info(
105+
"Running change point analysis for test ID %s" %
106+
test_config_container.test_id)
67107

68108
# test_name will be used to query a single test from
69109
# multiple tests in a single BQ table. Right now, the default
70110
# assumption is that all the test have an individual BQ table
71111
# but this might not be case for other tests(such as IO tests where
72112
# a single BQ tables stores all the data)
73-
test_name = params.get('test_name', None)
113+
test_name = test_config_container.test_name
74114

75115
min_runs_between_change_points = (
76-
constants._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS)
77-
if 'min_runs_between_change_points' in params:
78-
min_runs_between_change_points = params['min_runs_between_change_points']
116+
change_point_config.min_runs_between_change_points)
79117

80118
num_runs_in_change_point_window = (
81-
constants._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW)
82-
if 'num_runs_in_change_point_window' in params:
83-
num_runs_in_change_point_window = params['num_runs_in_change_point_window']
84-
85-
metric_values, timestamps = big_query_metrics_fetcher.fetch_metric_data(
86-
project=params['project'],
87-
metrics_dataset=params['metrics_dataset'],
88-
metrics_table=params['metrics_table'],
89-
metric_name=params['metric_name'],
90-
test_name=test_name
91-
)
119+
change_point_config.num_runs_in_change_point_window)
120+
121+
metric_container = big_query_metrics_fetcher.fetch_metric_data(
122+
test_config=test_config_container)
123+
metric_container.sort_by_timestamp()
124+
125+
metric_values = metric_container.values
126+
timestamps = metric_container.timestamps
92127

93128
change_point_index = find_latest_change_point_index(
94129
metric_values=metric_values)
95130
if not change_point_index:
96-
logging.info("Change point is not detected for the test ID %s" % test_id)
131+
logging.info(
132+
"Change point is not detected for the test ID %s" %
133+
test_config_container.test_id)
97134
return False
98135
# since timestamps are ordered in ascending order and
99136
# num_runs_in_change_point_window refers to the latest runs,
@@ -107,15 +144,17 @@ def run_change_point_analysis(
107144
'on metric %s. Since the change point run %s '
108145
'lies outside the num_runs_in_change_point_window distance: %s, '
109146
'alert is not raised.' % (
110-
test_id,
111-
metric_name,
147+
test_config_container.test_id,
148+
test_config_container.metric_name,
112149
latest_change_point_run + 1,
113150
num_runs_in_change_point_window))
114151
return False
115152

116-
is_alert = True
153+
is_valid_change_point = True
117154
last_reported_issue_number = None
118-
issue_metadata_table_name = f'{params.get("metrics_table")}_{metric_name}'
155+
issue_metadata_table_name = (
156+
f'{test_config_container.metrics_table}_{test_config_container.metric_name}' # pylint: disable=line-too-long
157+
)
119158
existing_issue_data = get_existing_issues_data(
120159
table_name=issue_metadata_table_name)
121160

@@ -127,37 +166,39 @@ def run_change_point_analysis(
127166
# convert numpy.int64 to int
128167
last_reported_issue_number = last_reported_issue_number.item()
129168

130-
is_alert = is_perf_alert(
169+
is_valid_change_point = is_sibling_change_point(
131170
previous_change_point_timestamps=existing_issue_timestamps,
132171
change_point_index=change_point_index,
133172
timestamps=timestamps,
134-
min_runs_between_change_points=min_runs_between_change_points)
135-
if is_alert:
173+
min_runs_between_change_points=min_runs_between_change_points,
174+
test_id=test_config_container.test_id)
175+
if is_valid_change_point:
136176
issue_number, issue_url = create_performance_alert(
137-
metric_name, test_id, timestamps,
138-
metric_values, change_point_index,
139-
params.get('labels', None),
140-
last_reported_issue_number,
141-
test_description = params.get('test_description', None),
142-
test_name = test_name
177+
test_config_container=test_config_container,
178+
metric_container=metric_container,
179+
change_point_index=change_point_index,
180+
existing_issue_number=last_reported_issue_number,
143181
)
144182

145183
issue_metadata = GitHubIssueMetaData(
146184
issue_timestamp=pd.Timestamp(
147185
datetime.now().replace(tzinfo=timezone.utc)),
148186
# BQ doesn't allow '.' in table name
149-
test_id=test_id.replace('.', '_'),
187+
test_id=test_config_container.test_id.replace('.', '_'),
150188
test_name=test_name or uuid.uuid4().hex,
151-
metric_name=metric_name,
189+
metric_name=test_config_container.metric_name,
152190
change_point=metric_values[change_point_index],
153191
issue_number=issue_number,
154192
issue_url=issue_url,
155-
change_point_timestamp=timestamps[change_point_index])
193+
change_point_timestamp=timestamps[change_point_index],
194+
)
156195

157196
publish_issue_metadata_to_big_query(
158-
issue_metadata=issue_metadata, table_name=issue_metadata_table_name)
159-
160-
return is_alert
197+
issue_metadata=issue_metadata,
198+
table_name=issue_metadata_table_name,
199+
project=test_config_container.project,
200+
)
201+
return is_valid_change_point
161202

162203

163204
def run(
@@ -185,10 +226,13 @@ def run(
185226
tests_config: Dict[str, Dict[str, Any]] = read_test_config(config_file_path)
186227

187228
for test_id, params in tests_config.items():
229+
test_config_container = get_test_config_container(params, test_id=test_id)
230+
change_point_config = get_change_point_config(params)
188231
run_change_point_analysis(
189-
params=params,
190-
test_id=test_id,
191-
big_query_metrics_fetcher=big_query_metrics_fetcher)
232+
test_config_container=test_config_container,
233+
big_query_metrics_fetcher=big_query_metrics_fetcher,
234+
change_point_config=change_point_config,
235+
)
192236

193237

194238
if __name__ == '__main__':

0 commit comments

Comments
 (0)