Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b2ecae3

Browse files
authored
feat: improve firebase error handling (GoogleCloudPlatform#53)
* add retry for registration * add unit tests for retries * bugfix: data lost when retrying snapshot updates
1 parent 2d5c7e2 commit b2ecae3

File tree

2 files changed

+357
-59
lines changed

2 files changed

+357
-59
lines changed

src/googleclouddebugger/firebase_client.py

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
"""Communicates with Firebase RTDB backend."""
1515

1616
from collections import deque
17+
import copy
1718
import hashlib
1819
import json
1920
import os
2021
import platform
2122
import requests
22-
import socket
2323
import sys
2424
import threading
25+
import time
2526
import traceback
2627

2728
import firebase_admin
@@ -114,6 +115,7 @@ def __init__(self):
114115

115116
# Delay before retrying failed request.
116117
self.register_backoff = backoff.Backoff() # Register debuggee.
118+
self.subscribe_backoff = backoff.Backoff() # Subscribe to updates.
117119
self.update_backoff = backoff.Backoff() # Update breakpoint.
118120

119121
# Maximum number of times that the message is re-transmitted before it
@@ -279,13 +281,25 @@ def _MainThreadProc(self):
279281
self._breakpoint_subscription.
280282
"""
281283
# Note: if self._credentials is None, default app credentials will be used.
282-
# TODO: Error handling.
283-
firebase_admin.initialize_app(self._credentials,
284-
{'databaseURL': self._database_url})
284+
try:
285+
firebase_admin.initialize_app(self._credentials,
286+
{'databaseURL': self._database_url})
287+
except ValueError:
288+
native.LogWarning(
289+
f'Failed to initialize firebase: {traceback.format_exc()}')
290+
native.LogError('Failed to start debugger agent. Giving up.')
291+
return
285292

286-
self._RegisterDebuggee()
293+
registration_required, delay = True, 0
294+
while registration_required:
295+
time.sleep(delay)
296+
registration_required, delay = self._RegisterDebuggee()
287297
self.registration_complete.set()
288-
self._SubscribeToBreakpoints()
298+
299+
subscription_required, delay = True, 0
300+
while subscription_required:
301+
time.sleep(delay)
302+
subscription_required, delay = self._SubscribeToBreakpoints()
289303
self.subscription_complete.set()
290304

291305
def _TransmissionThreadProc(self):
@@ -310,26 +324,29 @@ def _RegisterDebuggee(self):
310324
Returns:
311325
(registration_required, delay) tuple
312326
"""
327+
debuggee = None
313328
try:
314329
debuggee = self._GetDebuggee()
315330
self._debuggee_id = debuggee['id']
316-
317-
try:
318-
debuggee_path = f'cdbg/debuggees/{self._debuggee_id}'
319-
native.LogInfo(
320-
f'registering at {self._database_url}, path: {debuggee_path}')
321-
firebase_admin.db.reference(debuggee_path).set(debuggee)
322-
native.LogInfo(
323-
f'Debuggee registered successfully, ID: {self._debuggee_id}')
324-
self.register_backoff.Succeeded()
325-
return (False, 0) # Proceed immediately to subscribing to breakpoints.
326-
except BaseException:
327-
native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}')
328331
except BaseException:
329-
native.LogWarning('Debuggee information not available: ' +
330-
traceback.format_exc())
332+
native.LogWarning(
333+
f'Debuggee information not available: {traceback.format_exc()}')
334+
return (True, self.register_backoff.Failed())
331335

332-
return (True, self.register_backoff.Failed())
336+
try:
337+
debuggee_path = f'cdbg/debuggees/{self._debuggee_id}'
338+
native.LogInfo(
339+
f'registering at {self._database_url}, path: {debuggee_path}')
340+
firebase_admin.db.reference(debuggee_path).set(debuggee)
341+
native.LogInfo(
342+
f'Debuggee registered successfully, ID: {self._debuggee_id}')
343+
self.register_backoff.Succeeded()
344+
return (False, 0) # Proceed immediately to subscribing to breakpoints.
345+
except BaseException:
346+
# There is no significant benefit to handing different exceptions
347+
# in different ways; we will log and retry regardless.
348+
native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}')
349+
return (True, self.register_backoff.Failed())
333350

334351
def _SubscribeToBreakpoints(self):
335352
# Kill any previous subscriptions first.
@@ -340,7 +357,13 @@ def _SubscribeToBreakpoints(self):
340357
path = f'cdbg/breakpoints/{self._debuggee_id}/active'
341358
native.LogInfo(f'Subscribing to breakpoint updates at {path}')
342359
ref = firebase_admin.db.reference(path)
343-
self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback)
360+
try:
361+
self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback)
362+
return (False, 0)
363+
except firebase_admin.exceptions.FirebaseError:
364+
native.LogInfo(
365+
f'Failed to subscribe to breakpoints: {traceback.format_exc()}')
366+
return (True, self.subscribe_backoff.Failed())
344367

345368
def _ActiveBreakpointCallback(self, event):
346369
if event.event_type == 'put':
@@ -410,7 +433,7 @@ def _TransmitBreakpointUpdates(self):
410433
try:
411434
# Something has changed on the breakpoint.
412435
# It should be going from active to final, but let's make sure.
413-
if not breakpoint_data['isFinalState']:
436+
if not breakpoint_data.get('isFinalState', False):
414437
raise BaseException(
415438
f'Unexpected breakpoint update requested: {breakpoint_data}')
416439

@@ -428,6 +451,7 @@ def _TransmitBreakpointUpdates(self):
428451
f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}')
429452
bp_ref.delete()
430453

454+
summary_data = breakpoint_data
431455
# Save snapshot data for snapshots only.
432456
if is_snapshot:
433457
# Note that there may not be snapshot data.
@@ -436,14 +460,15 @@ def _TransmitBreakpointUpdates(self):
436460
bp_ref.set(breakpoint_data)
437461

438462
# Now strip potential snapshot data.
439-
breakpoint_data.pop('evaluatedExpressions', None)
440-
breakpoint_data.pop('stackFrames', None)
441-
breakpoint_data.pop('variableTable', None)
463+
summary_data = copy.deepcopy(breakpoint_data)
464+
summary_data.pop('evaluatedExpressions', None)
465+
summary_data.pop('stackFrames', None)
466+
summary_data.pop('variableTable', None)
442467

443468
# Then add it to the list of final breakpoints.
444469
bp_ref = firebase_admin.db.reference(
445470
f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}')
446-
bp_ref.set(breakpoint_data)
471+
bp_ref.set(summary_data)
447472

448473
native.LogInfo(f'Breakpoint {bp_id} update transmitted successfully')
449474

@@ -460,15 +485,7 @@ def _TransmitBreakpointUpdates(self):
460485
# This is very common if multiple instances are sending final update
461486
# simultaneously.
462487
native.LogInfo(f'{err}, breakpoint: {bp_id}')
463-
except socket.error as err:
464-
if retry_count < self.max_transmit_attempts - 1:
465-
native.LogInfo(f'Socket error {err.errno} while sending breakpoint '
466-
f'{bp_id} update: {traceback.format_exc()}')
467-
retry_list.append((breakpoint_data, retry_count + 1))
468-
else:
469-
native.LogWarning(f'Breakpoint {bp_id} retry count exceeded maximum')
470-
# Socket errors shouldn't persist like this; reconnect.
471-
#reconnect = True
488+
472489
except BaseException:
473490
native.LogWarning(f'Fatal error sending breakpoint {bp_id} update: '
474491
f'{traceback.format_exc()}')

0 commit comments

Comments
 (0)