Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 49d6efd

Browse files
authored
[BEAM-8661] Moving runners to have per-module logger (#10097)
* Moving runners to have per-module logger * Fix lint * Fixups * Fix merge issue
1 parent 1386b94 commit 49d6efd

40 files changed

Lines changed: 245 additions & 158 deletions

sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
SLEEP_TIME_SECS = 1
3333

34+
_LOGGER = logging.getLogger(__name__)
35+
3436

3537
class StreamingUserMetricsDoFn(beam.DoFn):
3638
"""Generates user metrics and outputs same element."""
@@ -53,7 +55,7 @@ def process(self, element):
5355
self.double_message_counter.inc()
5456
self.msg_len_dist_metric.update(len(text_line))
5557

56-
logging.debug("Done processing returning element array: '%s'", element)
58+
_LOGGER.debug("Done processing returning element array: '%s'", element)
5759

5860
return [element]
5961

sdks/python/apache_beam/runners/dataflow/dataflow_exercise_streaming_metrics_pipeline_test.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646

4747
SLEEP_TIME_SECS = 1
4848

49+
_LOGGER = logging.getLogger(__name__)
50+
4951

5052
class ExerciseStreamingMetricsPipelineTest(unittest.TestCase):
5153

@@ -79,10 +81,10 @@ def setUp(self):
7981

8082
def _inject_words(self, topic, messages):
8183
"""Inject messages as test data to PubSub."""
82-
logging.debug('Injecting messages to topic %s', topic.name)
84+
_LOGGER.debug('Injecting messages to topic %s', topic.name)
8385
for msg in messages:
8486
self.pub_client.publish(self.input_topic.name, msg.encode('utf-8'))
85-
logging.debug('Done. Injecting messages to topic %s', topic.name)
87+
_LOGGER.debug('Done. Injecting messages to topic %s', topic.name)
8688

8789
def tearDown(self):
8890
"""Delete all created topics and subs."""

sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
from apache_beam.options.pipeline_options import GoogleCloudOptions
4141
from apache_beam.options.pipeline_options import PipelineOptions
4242

43+
_LOGGER = logging.getLogger(__name__)
44+
4345

4446
def _get_match(proto, filter_fn):
4547
"""Finds and returns the first element that matches a query.
@@ -280,10 +282,10 @@ def main(argv):
280282
dataflow_client = apiclient.DataflowApplicationClient(options)
281283
df_metrics = DataflowMetrics(dataflow_client)
282284
all_metrics = df_metrics.all_metrics(job_id=flags.job_id)
283-
logging.info('Printing all MetricResults for %s in %s',
285+
_LOGGER.info('Printing all MetricResults for %s in %s',
284286
flags.job_id, flags.project)
285287
for metric_result in all_metrics:
286-
logging.info(metric_result)
288+
_LOGGER.info(metric_result)
287289

288290

289291
if __name__ == '__main__':

sdks/python/apache_beam/runners/dataflow/dataflow_runner.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@
7777
__all__ = ['DataflowRunner']
7878

7979

80+
_LOGGER = logging.getLogger(__name__)
81+
82+
8083
class DataflowRunner(PipelineRunner):
8184
"""A runner that creates job graphs and submits them for remote execution.
8285
@@ -164,7 +167,7 @@ def rank_error(msg):
164167
# an initialized 'currentState' field.
165168
if response.currentState is not None:
166169
if response.currentState != last_job_state:
167-
logging.info('Job %s is in state %s', job_id, response.currentState)
170+
_LOGGER.info('Job %s is in state %s', job_id, response.currentState)
168171
last_job_state = response.currentState
169172
if str(response.currentState) != 'JOB_STATE_RUNNING':
170173
# Stop checking for new messages on timeout, explanatory
@@ -210,7 +213,7 @@ def rank_error(msg):
210213
# Skip empty messages.
211214
if m.messageImportance is None:
212215
continue
213-
logging.info(message)
216+
_LOGGER.info(message)
214217
if str(m.messageImportance) == 'JOB_MESSAGE_ERROR':
215218
if rank_error(m.messageText) >= last_error_rank:
216219
last_error_rank = rank_error(m.messageText)
@@ -221,7 +224,7 @@ def rank_error(msg):
221224
if duration:
222225
passed_secs = time.time() - start_secs
223226
if passed_secs > duration_secs:
224-
logging.warning('Timing out on waiting for job %s after %d seconds',
227+
_LOGGER.warning('Timing out on waiting for job %s after %d seconds',
225228
job_id, passed_secs)
226229
break
227230

@@ -453,7 +456,7 @@ def run_pipeline(self, pipeline, options):
453456
dataflow_worker_jar = getattr(worker_options, 'dataflow_worker_jar', None)
454457
if dataflow_worker_jar is not None:
455458
if not apiclient._use_fnapi(options):
456-
logging.warning(
459+
_LOGGER.warning(
457460
'Typical end users should not use this worker jar feature. '
458461
'It can only be used when FnAPI is enabled.')
459462
else:
@@ -1031,12 +1034,12 @@ def run_Read(self, transform_node, options):
10311034
}
10321035
except error.RuntimeValueProviderError:
10331036
# Size estimation is best effort, and this error is by value provider.
1034-
logging.info(
1037+
_LOGGER.info(
10351038
'Could not estimate size of source %r due to ' + \
10361039
'RuntimeValueProviderError', transform.source)
10371040
except Exception: # pylint: disable=broad-except
10381041
# Size estimation is best effort. So we log the error and continue.
1039-
logging.info(
1042+
_LOGGER.info(
10401043
'Could not estimate size of source %r due to an exception: %s',
10411044
transform.source, traceback.format_exc())
10421045

@@ -1446,7 +1449,7 @@ def cancel(self):
14461449
self._update_job()
14471450

14481451
if self.is_in_terminal_state():
1449-
logging.warning(
1452+
_LOGGER.warning(
14501453
'Cancel failed because job %s is already terminated in state %s.',
14511454
self.job_id(), self.state)
14521455
else:
@@ -1455,7 +1458,7 @@ def cancel(self):
14551458
cancel_failed_message = (
14561459
'Failed to cancel job %s, please go to the Developers Console to '
14571460
'cancel it manually.') % self.job_id()
1458-
logging.error(cancel_failed_message)
1461+
_LOGGER.error(cancel_failed_message)
14591462
raise DataflowRuntimeException(cancel_failed_message, self)
14601463

14611464
return self.state

sdks/python/apache_beam/runners/dataflow/internal/apiclient.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
_LEGACY_ENVIRONMENT_MAJOR_VERSION = '7'
6868
_FNAPI_ENVIRONMENT_MAJOR_VERSION = '7'
6969

70+
_LOGGER = logging.getLogger(__name__)
71+
7072

7173
class Step(object):
7274
"""Wrapper for a dataflow Step protobuf."""
@@ -378,7 +380,7 @@ def __init__(self, options, proto_pipeline):
378380
'Missing required configuration parameters: %s' % missing)
379381

380382
if not self.google_cloud_options.staging_location:
381-
logging.info('Defaulting to the temp_location as staging_location: %s',
383+
_LOGGER.info('Defaulting to the temp_location as staging_location: %s',
382384
self.google_cloud_options.temp_location)
383385
(self.google_cloud_options
384386
.staging_location) = self.google_cloud_options.temp_location
@@ -495,7 +497,7 @@ def stage_file(self, gcs_or_local_path, file_name, stream,
495497
"""Stages a file at a GCS or local path with stream-supplied contents."""
496498
if not gcs_or_local_path.startswith('gs://'):
497499
local_path = FileSystems.join(gcs_or_local_path, file_name)
498-
logging.info('Staging file locally to %s', local_path)
500+
_LOGGER.info('Staging file locally to %s', local_path)
499501
with open(local_path, 'wb') as f:
500502
f.write(stream.read())
501503
return
@@ -505,7 +507,7 @@ def stage_file(self, gcs_or_local_path, file_name, stream,
505507
request = storage.StorageObjectsInsertRequest(
506508
bucket=bucket, name=name)
507509
start_time = time.time()
508-
logging.info('Starting GCS upload to %s...', gcs_location)
510+
_LOGGER.info('Starting GCS upload to %s...', gcs_location)
509511
upload = storage.Upload(stream, mime_type)
510512
try:
511513
response = self._storage_client.objects.Insert(request, upload=upload)
@@ -520,7 +522,7 @@ def stage_file(self, gcs_or_local_path, file_name, stream,
520522
'access to the specified path.') %
521523
(gcs_or_local_path, reportable_errors[e.status_code]))
522524
raise
523-
logging.info('Completed GCS upload to %s in %s seconds.', gcs_location,
525+
_LOGGER.info('Completed GCS upload to %s in %s seconds.', gcs_location,
524526
int(time.time() - start_time))
525527
return response
526528

@@ -544,7 +546,7 @@ def create_job(self, job):
544546
if not template_location:
545547
return self.submit_job_description(job)
546548

547-
logging.info('A template was just created at location %s',
549+
_LOGGER.info('A template was just created at location %s',
548550
template_location)
549551
return None
550552

@@ -564,7 +566,7 @@ def create_job_description(self, job):
564566
shared_names.STAGED_PIPELINE_FILENAME),
565567
packages=resources, options=job.options,
566568
environment_version=self.environment_version).proto
567-
logging.debug('JOB: %s', job)
569+
_LOGGER.debug('JOB: %s', job)
568570

569571
@retry.with_exponential_backoff(num_retries=3, initial_delay_secs=3)
570572
def get_job_metrics(self, job_id):
@@ -575,7 +577,7 @@ def get_job_metrics(self, job_id):
575577
try:
576578
response = self._client.projects_locations_jobs.GetMetrics(request)
577579
except exceptions.BadStatusCodeError as e:
578-
logging.error('HTTP status %d. Unable to query metrics',
580+
_LOGGER.error('HTTP status %d. Unable to query metrics',
579581
e.response.status)
580582
raise
581583
return response
@@ -591,16 +593,16 @@ def submit_job_description(self, job):
591593
try:
592594
response = self._client.projects_locations_jobs.Create(request)
593595
except exceptions.BadStatusCodeError as e:
594-
logging.error('HTTP status %d trying to create job'
596+
_LOGGER.error('HTTP status %d trying to create job'
595597
' at dataflow service endpoint %s',
596598
e.response.status,
597599
self.google_cloud_options.dataflow_endpoint)
598-
logging.fatal('details of server error: %s', e)
600+
_LOGGER.fatal('details of server error: %s', e)
599601
raise
600-
logging.info('Create job: %s', response)
602+
_LOGGER.info('Create job: %s', response)
601603
# The response is a Job proto with the id for the new job.
602-
logging.info('Created job with id: [%s]', response.id)
603-
logging.info(
604+
_LOGGER.info('Created job with id: [%s]', response.id)
605+
_LOGGER.info(
604606
'To access the Dataflow monitoring console, please navigate to '
605607
'https://console.cloud.google.com/dataflow/jobsDetail'
606608
'/locations/%s/jobs/%s?project=%s',

sdks/python/apache_beam/runners/dataflow/native_io/iobase.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
from apache_beam.transforms import ptransform
3131
from apache_beam.transforms.display import HasDisplayData
3232

33+
_LOGGER = logging.getLogger(__name__)
34+
3335

3436
def _dict_printable_fields(dict_object, skip_fields):
3537
"""Returns a list of strings for the interesting fields of a dict."""
@@ -135,7 +137,7 @@ def request_dynamic_split(self, dynamic_split_request):
135137
or a 'DynamicSplitResult' describing how the input was split into a
136138
primary and residual part.
137139
"""
138-
logging.debug(
140+
_LOGGER.debug(
139141
'SourceReader %r does not support dynamic splitting. Ignoring dynamic '
140142
'split request: %r',
141143
self, dynamic_split_request)

sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
# pool.
3737
WAIT_IN_STATE_TIMEOUT = 10 * 60
3838

39+
_LOGGER = logging.getLogger(__name__)
40+
3941

4042
class TestDataflowRunner(DataflowRunner):
4143
def run_pipeline(self, pipeline, options):
@@ -60,7 +62,7 @@ def run_pipeline(self, pipeline, options):
6062
self.wait_until_in_state(PipelineState.RUNNING)
6163

6264
if is_streaming and not wait_duration:
63-
logging.warning('Waiting indefinitely for streaming job.')
65+
_LOGGER.warning('Waiting indefinitely for streaming job.')
6466
self.result.wait_until_finish(duration=wait_duration)
6567

6668
if on_success_matcher:

sdks/python/apache_beam/runners/direct/direct_runner.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@
6060
'SwitchingDirectRunner']
6161

6262

63+
_LOGGER = logging.getLogger(__name__)
64+
65+
6366
class SwitchingDirectRunner(PipelineRunner):
6467
"""Executes a single pipeline on the local machine.
6568
@@ -376,7 +379,7 @@ def visit_transform(self, applied_ptransform):
376379
pipeline.visit(visitor)
377380
clock = TestClock() if visitor.uses_test_stream else RealClock()
378381

379-
logging.info('Running pipeline with DirectRunner.')
382+
_LOGGER.info('Running pipeline with DirectRunner.')
380383
self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
381384
pipeline.visit(self.consumer_tracking_visitor)
382385

@@ -418,7 +421,7 @@ def __init__(self, executor, evaluation_context):
418421

419422
def __del__(self):
420423
if self._state == PipelineState.RUNNING:
421-
logging.warning(
424+
_LOGGER.warning(
422425
'The DirectPipelineResult is being garbage-collected while the '
423426
'DirectRunner is still running the corresponding pipeline. This may '
424427
'lead to incomplete execution of the pipeline if the main thread '

sdks/python/apache_beam/runners/direct/executor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
from apache_beam.transforms import sideinputs
3838
from apache_beam.utils import counters
3939

40+
_LOGGER = logging.getLogger(__name__)
41+
4042

4143
class _ExecutorService(object):
4244
"""Thread pool for executing tasks in parallel."""
@@ -344,11 +346,11 @@ def call(self, state_sampler):
344346
break
345347
except Exception as e:
346348
self._retry_count += 1
347-
logging.error(
349+
_LOGGER.error(
348350
'Exception at bundle %r, due to an exception.\n %s',
349351
self._input_bundle, traceback.format_exc())
350352
if self._retry_count == self._max_retries_per_bundle:
351-
logging.error('Giving up after %s attempts.',
353+
_LOGGER.error('Giving up after %s attempts.',
352354
self._max_retries_per_bundle)
353355
self._completion_callback.handle_exception(self, e)
354356

@@ -566,7 +568,7 @@ def call(self, state_sampler):
566568
update.unprocessed_bundle)
567569
else:
568570
assert update.exception
569-
logging.warning('A task failed with exception: %s',
571+
_LOGGER.warning('A task failed with exception: %s',
570572
update.exception)
571573
self._executor.visible_updates.offer(
572574
_ExecutorServiceParallelExecutor._VisibleExecutorUpdate(
@@ -576,7 +578,7 @@ def call(self, state_sampler):
576578
self._executor.executor_service)
577579
self._add_work_if_necessary(self._fire_timers())
578580
except Exception as e: # pylint: disable=broad-except
579-
logging.error('Monitor task died due to exception.\n %s', e)
581+
_LOGGER.error('Monitor task died due to exception.\n %s', e)
580582
self._executor.visible_updates.offer(
581583
_ExecutorServiceParallelExecutor._VisibleExecutorUpdate(
582584
sys.exc_info()))

sdks/python/apache_beam/runners/direct/transform_evaluator.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
from apache_beam.utils.timestamp import MIN_TIMESTAMP
6666
from apache_beam.utils.timestamp import Timestamp
6767

68+
_LOGGER = logging.getLogger(__name__)
69+
6870

6971
class TransformEvaluatorRegistry(object):
7072
"""For internal use only; no backwards-compatibility guarantees.
@@ -606,7 +608,7 @@ def start_bundle(self):
606608

607609
def process_timer(self, timer_firing):
608610
if timer_firing.name not in self.user_timer_map:
609-
logging.warning('Unknown timer fired: %s', timer_firing)
611+
_LOGGER.warning('Unknown timer fired: %s', timer_firing)
610612
timer_spec = self.user_timer_map[timer_firing.name]
611613
self.runner.process_user_timer(
612614
timer_spec, self.key_coder.decode(timer_firing.encoded_key),

0 commit comments

Comments
 (0)