# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # pytype: skip-file import re import unittest import hamcrest as hc import pytest import apache_beam as beam from apache_beam import metrics from apache_beam.metrics.cells import DistributionData from apache_beam.metrics.execution import MetricKey from apache_beam.metrics.execution import MetricsContainer from apache_beam.metrics.execution import MetricsEnvironment from apache_beam.metrics.metric import Lineage from apache_beam.metrics.metric import MetricResults from apache_beam.metrics.metric import Metrics from apache_beam.metrics.metric import MetricsFilter from apache_beam.metrics.metricbase import MetricName from apache_beam.runners.direct.direct_runner import BundleBasedDirectRunner from apache_beam.runners.worker import statesampler from apache_beam.testing.metric_result_matchers import DistributionMatcher from apache_beam.testing.metric_result_matchers import MetricResultMatcher from apache_beam.testing.test_pipeline import TestPipeline from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to from apache_beam.utils import counters from apache_beam.utils.histogram import LinearBucket class NameTest(unittest.TestCase): def test_basic_metric_name(self): name = MetricName('namespace1', 'name1') self.assertEqual(name.namespace, 'namespace1') self.assertEqual(name.name, 'name1') self.assertEqual(name, MetricName('namespace1', 'name1')) key = MetricKey('step1', name) self.assertEqual(key.step, 'step1') self.assertEqual(key.metric.namespace, 'namespace1') self.assertEqual(key.metric.name, 'name1') self.assertEqual(key, MetricKey('step1', MetricName('namespace1', 'name1'))) class MetricResultsTest(unittest.TestCase): def test_metric_filter_namespace_matching(self): filter = MetricsFilter().with_namespace('ns1') name = MetricName('ns1', 'name1') key = MetricKey('step1', name) self.assertTrue(MetricResults.matches(filter, key)) def test_metric_filter_name_matching(self): filter = MetricsFilter().with_name('name1').with_namespace('ns1') name = MetricName('ns1', 'name1') key = MetricKey('step1', name) self.assertTrue(MetricResults.matches(filter, key)) filter = MetricsFilter().with_name('name1') name = MetricName('ns1', 'name1') key = MetricKey('step1', name) self.assertTrue(MetricResults.matches(filter, key)) def test_metric_filter_step_matching(self): name = MetricName('ns1', 'name1') filter = MetricsFilter().with_step('Step1') key = MetricKey('Step1', name) self.assertTrue(MetricResults.matches(filter, key)) key = MetricKey('Step10', name) self.assertFalse(MetricResults.matches(filter, key)) key = MetricKey('Step10/Step1', name) self.assertTrue(MetricResults.matches(filter, key)) key = MetricKey('Top1/Outer1/Inner1', name) filter = MetricsFilter().with_step('Top1/Outer1/Inner1') self.assertTrue(MetricResults.matches(filter, key)) filter = MetricsFilter().with_step('Top1/Outer1') self.assertTrue(MetricResults.matches(filter, key)) filter = MetricsFilter().with_step('Outer1/Inner1') self.assertTrue(MetricResults.matches(filter, key)) filter = MetricsFilter().with_step('Top1/Inner1') self.assertFalse(MetricResults.matches(filter, key)) class MetricsTest(unittest.TestCase): def test_get_namespace_class(self): class MyClass(object): pass self.assertEqual( '{}.{}'.format(MyClass.__module__, MyClass.__name__), Metrics.get_namespace(MyClass)) def test_get_namespace_string(self): namespace = 'MyNamespace' self.assertEqual(namespace, Metrics.get_namespace(namespace)) def test_get_namespace_error(self): with self.assertRaises(ValueError): Metrics.get_namespace(object()) def test_counter_empty_name(self): with self.assertRaises(ValueError): Metrics.counter("namespace", "") def test_counter_empty_namespace(self): with self.assertRaises(ValueError): Metrics.counter("", "names") def test_distribution_empty_name(self): with self.assertRaises(ValueError): Metrics.distribution("namespace", "") def test_distribution_empty_namespace(self): with self.assertRaises(ValueError): Metrics.distribution("", "names") # Do not change the behaviour of str(), do tno update/delete this test case # if the behaviour of str() is changed. Doing so will # break end user beam code which depends on the str() behaviour. def test_user_metric_name_str(self): mn = MetricName("my_namespace", "my_name") expected_str = 'MetricName(namespace=my_namespace, name=my_name)' self.assertEqual(str(mn), expected_str) def test_general_urn_metric_name_str(self): mn = MetricName( "my_namespace", "my_name", urn='my_urn', labels={'key': 'value'}) expected_str = ( "MetricName(namespace=my_namespace, name=my_name, " "urn=my_urn, labels={'key': 'value'})") self.assertEqual(str(mn), expected_str) @pytest.mark.it_validatesrunner def test_user_counter_using_pardo(self): class SomeDoFn(beam.DoFn): """A custom dummy DoFn using yield.""" static_counter_elements = metrics.Metrics.counter( "SomeDoFn", 'metrics_static_counter_element') def __init__(self): self.user_counter_elements = metrics.Metrics.counter( self.__class__, 'metrics_user_counter_element') def process(self, element): self.static_counter_elements.inc(2) self.user_counter_elements.inc() distro = Metrics.distribution(self.__class__, 'element_dist') distro.update(element) yield element pipeline = TestPipeline() nums = pipeline | 'Input' >> beam.Create([1, 2, 3, 4]) results = nums | 'ApplyPardo' >> beam.ParDo(SomeDoFn()) assert_that(results, equal_to([1, 2, 3, 4])) res = pipeline.run() res.wait_until_finish() # Verify static counter. metric_results = ( res.metrics().query( MetricsFilter().with_metric(SomeDoFn.static_counter_elements))) outputs_static_counter = metric_results['counters'][0] self.assertEqual( outputs_static_counter.key.metric.name, 'metrics_static_counter_element') self.assertEqual(outputs_static_counter.committed, 8) # Verify user counter. metric_results = ( res.metrics().query( MetricsFilter().with_name('metrics_user_counter_element'))) outputs_user_counter = metric_results['counters'][0] self.assertEqual( outputs_user_counter.key.metric.name, 'metrics_user_counter_element') self.assertEqual(outputs_user_counter.committed, 4) # Verify user distribution counter. metric_results = res.metrics().query() matcher = MetricResultMatcher( step=hc.contains_string('ApplyPardo'), namespace=hc.contains_string('SomeDoFn'), name='element_dist', committed=DistributionMatcher( sum_value=hc.greater_than_or_equal_to(0), count_value=hc.greater_than_or_equal_to(0), min_value=hc.greater_than_or_equal_to(0), max_value=hc.greater_than_or_equal_to(0))) hc.assert_that(metric_results['distributions'], hc.has_item(matcher)) def test_create_counter_distribution(self): sampler = statesampler.StateSampler('', counters.CounterFactory()) statesampler.set_current_tracker(sampler) state1 = sampler.scoped_state( 'mystep', 'myState', metrics_container=MetricsContainer('mystep')) try: sampler.start() with state1: counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.get_counter(MetricName(counter_ns, name)).get_cumulative(), 7) self.assertEqual( container.get_distribution(MetricName(distro_ns, name)).get_cumulative(), DistributionData(12, 2, 2, 10)) finally: sampler.stop() class LineageTest(unittest.TestCase): def test_fq_name(self): test_cases = { "apache-beam": "apache-beam", "`apache-beam`": "`apache-beam`", "apache.beam": "`apache.beam`", "apache:beam": "`apache:beam`", "apache beam": "`apache beam`", "`apache beam`": "`apache beam`", "apache\tbeam": "`apache\tbeam`", "apache\nbeam": "`apache\nbeam`" } for k, v in test_cases.items(): self.assertEqual("apache:" + v, Lineage.get_fq_name("apache", k)) self.assertEqual( "apache:beam:" + v, Lineage.get_fq_name("apache", k, subtype="beam")) self.assertEqual( "apache:beam:" + v + '.' + v, Lineage.get_fq_name("apache", k, k, subtype="beam")) def test_add(self): lineage = Lineage(Lineage.SOURCE) added = set() # override lineage.metric = added lineage.add("s", "1", "2") lineage.add("s:3.4") lineage.add("s", "5", "6.7") lineage.add("s", "1", "2", subtype="t") lineage.add("sys", "seg1", "seg2", "seg3/part2/part3", last_segment_sep='/') self.assertSetEqual( added, {('s:', '1.', '2'), ('s:3.4:', ), ('s:', '5.', '6.7'), ('s:', 't:', '1.', '2'), ('sys:', 'seg1.', 'seg2.', 'seg3/', 'part2/', 'part3')}) class HistogramTest(unittest.TestCase): def test_histogram(self): class WordExtractingDoFn(beam.DoFn): def __init__(self): self.word_lengths_dist = Metrics.histogram( self.__class__, 'latency_histogram_ms', LinearBucket(0, 1, num_buckets=10)) def process(self, element): text_line = element.strip() words = re.findall(r'[\w\']+', text_line, re.UNICODE) for w in words: self.word_lengths_dist.update(len(w)) return words with beam.Pipeline(runner=BundleBasedDirectRunner()) as p: lines = p | 'read' >> beam.Create(["x x x yyyyyy yyyyyy yyyyyy"]) _ = ( lines | 'split' >> (beam.ParDo(WordExtractingDoFn()).with_output_types(str))) result = p.result filter = MetricsFilter().with_name('latency_histogram_ms') query_result = result.metrics().query(filter) histogram = query_result['histograms'][0].committed.histogram assert histogram._buckets == {1: 3, 6: 3} assert histogram.total_count() == 6 assert 1 < histogram.get_linear_interpolation(0.50) < 3 assert histogram.get_linear_interpolation(0.99) > 3 if __name__ == '__main__': unittest.main()