Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 55693aa

Browse files
author
Takashi Matsuo
authored
[monitoring] testing: mitigate 409 conflicts (GoogleCloudPlatform#3311)
* [monitoring] testing: mitigate 409 conflicts fixes GoogleCloudPlatform#2971 * retry on ServiceUnavailable too * reduce the number of api calls * mark tests as flaky instead of having retries * fix the rerun_filter implementation * add randomness to the sleep calls * lonter wait, better teardown * allow both messages
1 parent 8f49b4e commit 55693aa

File tree

2 files changed

+76
-51
lines changed

2 files changed

+76
-51
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pytest==5.3.2
22
retrying==1.3.3
3+
flaky==3.6.1

monitoring/api/v3/alerts-client/snippets_test.py

Lines changed: 75 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@
1616

1717
import random
1818
import string
19+
import time
1920

2021
from google.api_core.exceptions import Aborted
22+
from google.api_core.exceptions import NotFound
23+
from google.api_core.exceptions import ServiceUnavailable
2124
from google.cloud import monitoring_v3
2225
import google.protobuf.json_format
2326
import pytest
@@ -26,13 +29,25 @@
2629
import snippets
2730

2831

32+
# We assume we have access to good randomness source.
33+
random.seed()
34+
35+
2936
def random_name(length):
3037
return ''.join(
3138
[random.choice(string.ascii_lowercase) for i in range(length)])
3239

3340

3441
def retry_if_aborted(exception):
35-
return isinstance(exception, Aborted)
42+
return isinstance(exception, (Aborted, ServiceUnavailable))
43+
44+
45+
def delay_on_aborted(err, *args):
46+
if retry_if_aborted(err[1]):
47+
# add randomness for avoiding continuous conflict
48+
time.sleep(5 + (random.randint(0, 9) * 0.1))
49+
return True
50+
return False
3651

3752

3853
class PochanFixture:
@@ -49,7 +64,7 @@ def __init__(self):
4964

5065
def __enter__(self):
5166
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
52-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
67+
stop_max_attempt_number=10, retry_on_exception=retry_if_aborted)
5368
def setup():
5469
# Create a policy.
5570
policy = monitoring_v3.types.alert_pb2.AlertPolicy()
@@ -74,13 +89,20 @@ def setup():
7489
def __exit__(self, type, value, traceback):
7590
# Delete the policy and channel we created.
7691
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
77-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
92+
stop_max_attempt_number=10, retry_on_exception=retry_if_aborted)
7893
def teardown():
79-
self.alert_policy_client.delete_alert_policy(
80-
self.alert_policy.name)
81-
if self.notification_channel.name:
82-
self.notification_channel_client.delete_notification_channel(
83-
self.notification_channel.name)
94+
try:
95+
self.alert_policy_client.delete_alert_policy(
96+
self.alert_policy.name)
97+
except NotFound:
98+
print("Ignored NotFound when deleting a policy.")
99+
try:
100+
if self.notification_channel.name:
101+
self.notification_channel_client\
102+
.delete_notification_channel(
103+
self.notification_channel.name)
104+
except NotFound:
105+
print("Ignored NotFound when deleting a channel.")
84106
teardown()
85107

86108

@@ -96,72 +118,74 @@ def test_list_alert_policies(capsys, pochan):
96118
assert pochan.alert_policy.display_name in out
97119

98120

121+
@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5)
99122
def test_enable_alert_policies(capsys, pochan):
100-
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
101-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
102-
def invoke_sample(val):
103-
snippets.enable_alert_policies(pochan.project_name, val)
104-
105-
invoke_sample(False)
106-
invoke_sample(False)
107-
out, _ = capsys.readouterr()
108-
assert "already disabled" in out
109-
110-
invoke_sample(True)
123+
# These sleep calls are for mitigating the following error:
124+
# "409 Too many concurrent edits to the project configuration.
125+
# Please try again."
126+
# Having multiple projects will void these `sleep()` calls.
127+
# See also #3310
128+
time.sleep(2)
129+
snippets.enable_alert_policies(pochan.project_name, True)
111130
out, _ = capsys.readouterr()
112-
assert "Enabled {0}".format(pochan.project_name) in out
131+
assert "Enabled {0}".format(pochan.project_name) in out \
132+
or "{} is already enabled".format(pochan.alert_policy.name) in out
113133

114-
invoke_sample(True)
134+
time.sleep(2)
135+
snippets.enable_alert_policies(pochan.project_name, False)
115136
out, _ = capsys.readouterr()
116-
assert "already enabled" in out
137+
assert "Disabled {}".format(pochan.project_name) in out \
138+
or "{} is already disabled".format(pochan.alert_policy.name) in out
117139

118140

141+
@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5)
119142
def test_replace_channels(capsys, pochan):
120-
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
121-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
122-
def invoke_sample():
123-
alert_policy_id = pochan.alert_policy.name.split('/')[-1]
124-
notification_channel_id = pochan.notification_channel.name.split(
125-
'/')[-1]
126-
snippets.replace_notification_channels(
127-
pochan.project_name, alert_policy_id, [notification_channel_id])
128-
129-
invoke_sample()
143+
alert_policy_id = pochan.alert_policy.name.split('/')[-1]
144+
notification_channel_id = pochan.notification_channel.name.split('/')[-1]
145+
146+
# This sleep call is for mitigating the following error:
147+
# "409 Too many concurrent edits to the project configuration.
148+
# Please try again."
149+
# Having multiple projects will void this `sleep()` call.
150+
# See also #3310
151+
time.sleep(2)
152+
snippets.replace_notification_channels(
153+
pochan.project_name, alert_policy_id, [notification_channel_id])
130154
out, _ = capsys.readouterr()
131155
assert "Updated {0}".format(pochan.alert_policy.name) in out
132156

133157

158+
@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5)
134159
def test_backup_and_restore(capsys, pochan):
135-
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
136-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
137-
def invoke_backup():
138-
snippets.backup(pochan.project_name, 'backup.json')
139-
140-
invoke_backup()
160+
# These sleep calls are for mitigating the following error:
161+
# "409 Too many concurrent edits to the project configuration.
162+
# Please try again."
163+
# Having multiple projects will void this `sleep()` call.
164+
# See also #3310
165+
time.sleep(2)
166+
snippets.backup(pochan.project_name, 'backup.json')
141167
out, _ = capsys.readouterr()
142168

143-
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
144-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
145-
def invoke_restore():
146-
snippets.restore(pochan.project_name, 'backup.json')
147-
148-
invoke_restore()
169+
time.sleep(2)
170+
snippets.restore(pochan.project_name, 'backup.json')
149171
out, _ = capsys.readouterr()
150172
assert "Updated {0}".format(pochan.alert_policy.name) in out
151173
assert "Updating channel {0}".format(
152174
pochan.notification_channel.display_name) in out
153175

154176

177+
@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5)
155178
def test_delete_channels(capsys, pochan):
156179
notification_channel_id = pochan.notification_channel.name.split('/')[-1]
157180

158-
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000,
159-
stop_max_attempt_number=5, retry_on_exception=retry_if_aborted)
160-
def invoke_delete():
161-
snippets.delete_notification_channels(
162-
pochan.project_name, [notification_channel_id], force=True)
163-
164-
invoke_delete()
181+
# This sleep call is for mitigating the following error:
182+
# "409 Too many concurrent edits to the project configuration.
183+
# Please try again."
184+
# Having multiple projects will void these `sleep()` calls.
185+
# See also #3310
186+
time.sleep(2)
187+
snippets.delete_notification_channels(
188+
pochan.project_name, [notification_channel_id], force=True)
165189
out, _ = capsys.readouterr()
166190
assert "{0} deleted".format(notification_channel_id) in out
167191
pochan.notification_channel.name = '' # So teardown is not tried

0 commit comments

Comments
 (0)