3434
3535from apache_beam .testing .analyzers import constants
3636from apache_beam .testing .analyzers .perf_analysis_utils import BigQueryMetricsFetcher
37+ from apache_beam .testing .analyzers .perf_analysis_utils import ChangePointConfig
3738from apache_beam .testing .analyzers .perf_analysis_utils import GitHubIssueMetaData
3839from apache_beam .testing .analyzers .perf_analysis_utils import MetricsFetcher
40+ from apache_beam .testing .analyzers .perf_analysis_utils import TestConfigContainer
3941from apache_beam .testing .analyzers .perf_analysis_utils import create_performance_alert
4042from apache_beam .testing .analyzers .perf_analysis_utils import find_latest_change_point_index
4143from apache_beam .testing .analyzers .perf_analysis_utils import get_existing_issues_data
4244from apache_beam .testing .analyzers .perf_analysis_utils import is_change_point_in_valid_window
43- from apache_beam .testing .analyzers .perf_analysis_utils import is_perf_alert
45+ from apache_beam .testing .analyzers .perf_analysis_utils import is_sibling_change_point
4446from apache_beam .testing .analyzers .perf_analysis_utils import publish_issue_metadata_to_big_query
4547from apache_beam .testing .analyzers .perf_analysis_utils import read_test_config
46- from apache_beam .testing .analyzers .perf_analysis_utils import validate_config
48+
49+
50+ def get_test_config_container (
51+ params : Dict [str , Any ],
52+ test_id : str ,
53+ ) -> TestConfigContainer :
54+ """
55+ Args:
56+ params: Dict containing parameters to run change point analysis.
57+ Returns:
58+ TestConfigContainer object containing test config parameters.
59+ """
60+ return TestConfigContainer (
61+ project = params ['project' ],
62+ metrics_dataset = params ['metrics_dataset' ],
63+ metrics_table = params ['metrics_table' ],
64+ metric_name = params ['metric_name' ],
65+ test_id = test_id ,
66+ test_description = params ['test_description' ],
67+ test_name = params .get ('test_name' , None ),
68+ labels = params .get ('labels' , None ),
69+ )
70+
71+
72+ def get_change_point_config (params : Dict [str , Any ], ) -> ChangePointConfig :
73+ """
74+ Args:
75+ params: Dict containing parameters to run change point analysis.
76+ Returns:
77+ ChangePointConfig object containing change point analysis parameters.
78+ """
79+ return ChangePointConfig (
80+ min_runs_between_change_points = params .get (
81+ 'min_runs_between_change_points' ,
82+ constants ._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS ),
83+ num_runs_in_change_point_window = params .get (
84+ 'num_runs_in_change_point_window' ,
85+ constants ._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW ))
4786
4887
4988def run_change_point_analysis (
50- params , test_id , big_query_metrics_fetcher : MetricsFetcher ):
89+ test_config_container : TestConfigContainer ,
90+ big_query_metrics_fetcher : MetricsFetcher ,
91+ change_point_config : ChangePointConfig = ChangePointConfig (),
92+ ):
5193 """
5294 Args:
53- params: Dict containing parameters to run change point analysis.
54- test_id: Test id for the current test .
95+ test_config_container: TestConfigContainer containing test metadata for
96+ fetching data and running change point analysis .
5597 big_query_metrics_fetcher: BigQuery metrics fetcher used to fetch data for
5698 change point analysis.
99+ change_point_config: ChangePointConfig containing parameters to run
100+ change point analysis.
57101 Returns:
58102 bool indicating if a change point is observed and alerted on GitHub.
59103 """
60- logging .info ("Running change point analysis for test ID %s" % test_id )
61- if not validate_config (params .keys ()):
62- raise ValueError (
63- f"Please make sure all these keys { constants ._PERF_TEST_KEYS } "
64- f"are specified for the { test_id } " )
65-
66- metric_name = params ['metric_name' ]
104+ logging .info (
105+ "Running change point analysis for test ID %s" %
106+ test_config_container .test_id )
67107
68108 # test_name will be used to query a single test from
69109 # multiple tests in a single BQ table. Right now, the default
70110 # assumption is that all the test have an individual BQ table
71111 # but this might not be case for other tests(such as IO tests where
72112 # a single BQ tables stores all the data)
73- test_name = params . get ( ' test_name' , None )
113+ test_name = test_config_container . test_name
74114
75115 min_runs_between_change_points = (
76- constants ._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS )
77- if 'min_runs_between_change_points' in params :
78- min_runs_between_change_points = params ['min_runs_between_change_points' ]
116+ change_point_config .min_runs_between_change_points )
79117
80118 num_runs_in_change_point_window = (
81- constants ._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW )
82- if 'num_runs_in_change_point_window' in params :
83- num_runs_in_change_point_window = params ['num_runs_in_change_point_window' ]
84-
85- metric_values , timestamps = big_query_metrics_fetcher .fetch_metric_data (
86- project = params ['project' ],
87- metrics_dataset = params ['metrics_dataset' ],
88- metrics_table = params ['metrics_table' ],
89- metric_name = params ['metric_name' ],
90- test_name = test_name
91- )
119+ change_point_config .num_runs_in_change_point_window )
120+
121+ metric_container = big_query_metrics_fetcher .fetch_metric_data (
122+ test_config = test_config_container )
123+ metric_container .sort_by_timestamp ()
124+
125+ metric_values = metric_container .values
126+ timestamps = metric_container .timestamps
92127
93128 change_point_index = find_latest_change_point_index (
94129 metric_values = metric_values )
95130 if not change_point_index :
96- logging .info ("Change point is not detected for the test ID %s" % test_id )
131+ logging .info (
132+ "Change point is not detected for the test ID %s" %
133+ test_config_container .test_id )
97134 return False
98135 # since timestamps are ordered in ascending order and
99136 # num_runs_in_change_point_window refers to the latest runs,
@@ -107,15 +144,17 @@ def run_change_point_analysis(
107144 'on metric %s. Since the change point run %s '
108145 'lies outside the num_runs_in_change_point_window distance: %s, '
109146 'alert is not raised.' % (
110- test_id ,
111- metric_name ,
147+ test_config_container . test_id ,
148+ test_config_container . metric_name ,
112149 latest_change_point_run + 1 ,
113150 num_runs_in_change_point_window ))
114151 return False
115152
116- is_alert = True
153+ is_valid_change_point = True
117154 last_reported_issue_number = None
118- issue_metadata_table_name = f'{ params .get ("metrics_table" )} _{ metric_name } '
155+ issue_metadata_table_name = (
156+ f'{ test_config_container .metrics_table } _{ test_config_container .metric_name } ' # pylint: disable=line-too-long
157+ )
119158 existing_issue_data = get_existing_issues_data (
120159 table_name = issue_metadata_table_name )
121160
@@ -127,37 +166,39 @@ def run_change_point_analysis(
127166 # convert numpy.int64 to int
128167 last_reported_issue_number = last_reported_issue_number .item ()
129168
130- is_alert = is_perf_alert (
169+ is_valid_change_point = is_sibling_change_point (
131170 previous_change_point_timestamps = existing_issue_timestamps ,
132171 change_point_index = change_point_index ,
133172 timestamps = timestamps ,
134- min_runs_between_change_points = min_runs_between_change_points )
135- if is_alert :
173+ min_runs_between_change_points = min_runs_between_change_points ,
174+ test_id = test_config_container .test_id )
175+ if is_valid_change_point :
136176 issue_number , issue_url = create_performance_alert (
137- metric_name , test_id , timestamps ,
138- metric_values , change_point_index ,
139- params .get ('labels' , None ),
140- last_reported_issue_number ,
141- test_description = params .get ('test_description' , None ),
142- test_name = test_name
177+ test_config_container = test_config_container ,
178+ metric_container = metric_container ,
179+ change_point_index = change_point_index ,
180+ existing_issue_number = last_reported_issue_number ,
143181 )
144182
145183 issue_metadata = GitHubIssueMetaData (
146184 issue_timestamp = pd .Timestamp (
147185 datetime .now ().replace (tzinfo = timezone .utc )),
148186 # BQ doesn't allow '.' in table name
149- test_id = test_id .replace ('.' , '_' ),
187+ test_id = test_config_container . test_id .replace ('.' , '_' ),
150188 test_name = test_name or uuid .uuid4 ().hex ,
151- metric_name = metric_name ,
189+ metric_name = test_config_container . metric_name ,
152190 change_point = metric_values [change_point_index ],
153191 issue_number = issue_number ,
154192 issue_url = issue_url ,
155- change_point_timestamp = timestamps [change_point_index ])
193+ change_point_timestamp = timestamps [change_point_index ],
194+ )
156195
157196 publish_issue_metadata_to_big_query (
158- issue_metadata = issue_metadata , table_name = issue_metadata_table_name )
159-
160- return is_alert
197+ issue_metadata = issue_metadata ,
198+ table_name = issue_metadata_table_name ,
199+ project = test_config_container .project ,
200+ )
201+ return is_valid_change_point
161202
162203
163204def run (
@@ -185,10 +226,13 @@ def run(
185226 tests_config : Dict [str , Dict [str , Any ]] = read_test_config (config_file_path )
186227
187228 for test_id , params in tests_config .items ():
229+ test_config_container = get_test_config_container (params , test_id = test_id )
230+ change_point_config = get_change_point_config (params )
188231 run_change_point_analysis (
189- params = params ,
190- test_id = test_id ,
191- big_query_metrics_fetcher = big_query_metrics_fetcher )
232+ test_config_container = test_config_container ,
233+ big_query_metrics_fetcher = big_query_metrics_fetcher ,
234+ change_point_config = change_point_config ,
235+ )
192236
193237
194238if __name__ == '__main__' :
0 commit comments