From f2251ea163d3b3dac99c9a789b697659af40c235 Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Mon, 21 Oct 2019 20:06:34 -0700 Subject: [PATCH 1/9] Make ProbabilitySampler check high byets --- .../src/opentelemetry/trace/sampling.py | 10 +++--- .../tests/trace/test_sampling.py | 36 +++++++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index f16e80495bf..cc57b122818 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -80,13 +80,13 @@ def __init__(self, rate: float): self._rate = rate self._bound = self.get_bound_for_rate(self._rate) - # The sampler checks the last 8 bytes of the trace ID to decide whether to - # sample a given trace. - CHECK_BYTES = 0xFFFFFFFFFFFFFFFF + # The sampler checks the 8 high-order bytes of the trace ID to decide + # whether to sample a given trace. + CHECK_HIGH_BITS = 64 @classmethod def get_bound_for_rate(cls, rate: float) -> int: - return round(rate * (cls.CHECK_BYTES + 1)) + return round(rate * 2 ** cls.CHECK_HIGH_BITS) @property def rate(self) -> float: @@ -112,7 +112,7 @@ def should_sample( if parent_context is not None: return Decision(parent_context.trace_options.sampled) - return Decision(trace_id & self.CHECK_BYTES < self.bound) + return Decision(trace_id >> self.CHECK_HIGH_BITS < self.bound) # Samplers that ignore the parent sampling decision and never/always sample. diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py index b456aa91f18..ccb5da8c159 100644 --- a/opentelemetry-api/tests/trace/test_sampling.py +++ b/opentelemetry-api/tests/trace/test_sampling.py @@ -121,12 +121,12 @@ def test_probability_sampler(self): # null self.assertTrue( sampler.should_sample( - None, 0x7FFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" + None, 0x7FFFFFFFFFFFFFFF0000000000000000, 0xDEADBEEF, "span name" ).sampled ) self.assertFalse( sampler.should_sample( - None, 0x8000000000000000, 0xDEADBEEF, "span name" + None, 0x80000000000000000000000000000000, 0xDEADBEEF, "span name" ).sampled ) @@ -137,7 +137,7 @@ def test_probability_sampler(self): trace.SpanContext( 0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT ), - 0x8000000000000000, + 0x7FFFFFFFFFFFFFFF0000000000000000, 0xDEADBEEF, "span name", ).sampled @@ -147,7 +147,7 @@ def test_probability_sampler(self): trace.SpanContext( 0xDEADBEF0, 0xDEADBEF1, trace_options=TO_SAMPLED ), - 0x8000000000000001, + 0x80000000000000000000000000000000, 0xDEADBEEF, "span name", ).sampled @@ -165,7 +165,10 @@ def test_probability_sampler_one(self): default_off = sampling.ProbabilitySampler(1.0) self.assertTrue( default_off.should_sample( - None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" + None, + 0xFFFFFFFFFFFFFFFF0000000000000000, + 0xDEADBEEF, + "span name", ).sampled ) @@ -182,26 +185,29 @@ def test_probability_sampler_limits(self): ) self.assertFalse( almost_always_off.should_sample( - None, 0x1, 0xDEADBEEF, "span name" + None, 0x10000000000000000, 0xDEADBEEF, "span name" ).sampled ) self.assertEqual( sampling.ProbabilitySampler.get_bound_for_rate(2 ** -64), 0x1 ) - # Sample every trace with (last 8 bytes of) trace ID less than - # 0xffffffffffffffff. In principle this is the highest possible - # sampling rate less than 1, but we can't actually express this rate as - # a float! + # Sample every trace with trace ID less than + # 0xffffffffffffffff0000000000000000. In principle this is the highest + # possible sampling rate less than 1, but we can't actually express + # this rate as a float! # # In practice, the highest possible sampling rate is: # - # round(sys.float_info.epsilon * 2 ** 64) + # 1 - sys.float_info.epsilon almost_always_on = sampling.ProbabilitySampler(1 - 2 ** -64) self.assertTrue( almost_always_on.should_sample( - None, 0xFFFFFFFFFFFFFFFE, 0xDEADBEEF, "span name" + None, + 0xFFFFFFFFFFFFFFFE0000000000000000, + 0xDEADBEEF, + "span name", ).sampled ) @@ -212,12 +218,12 @@ def test_probability_sampler_limits(self): # self.assertFalse( # almost_always_on.should_sample( # None, - # 0xffffffffffffffff, - # 0xdeadbeef, + # 0xFFFFFFFFFFFFFFFF0000000000000000, + # 0xDEADBEEF, # "span name", # ).sampled # ) # self.assertEqual( # sampling.ProbabilitySampler.get_bound_for_rate(1 - 2 ** -64)), - # 0xffffffffffffffff, + # 0xFFFFFFFFFFFFFFFF0000000000000000, # ) From bdee2f9b610d1e1119b8cf17310a730018087605 Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Thu, 24 Oct 2019 17:03:31 -0700 Subject: [PATCH 2/9] Blacken --- opentelemetry-api/tests/trace/test_sampling.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py index ccb5da8c159..5ab9bdc33fb 100644 --- a/opentelemetry-api/tests/trace/test_sampling.py +++ b/opentelemetry-api/tests/trace/test_sampling.py @@ -121,12 +121,18 @@ def test_probability_sampler(self): # null self.assertTrue( sampler.should_sample( - None, 0x7FFFFFFFFFFFFFFF0000000000000000, 0xDEADBEEF, "span name" + None, + 0x7FFFFFFFFFFFFFFF0000000000000000, + 0xDEADBEEF, + "span name", ).sampled ) self.assertFalse( sampler.should_sample( - None, 0x80000000000000000000000000000000, 0xDEADBEEF, "span name" + None, + 0x80000000000000000000000000000000, + 0xDEADBEEF, + "span name", ).sampled ) From 381c41ff5890e99a674ce81e2cd5ab635e14f29a Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Fri, 25 Oct 2019 13:59:33 -0700 Subject: [PATCH 3/9] Types fix, slight optimization This change because mypy isn't willing to believe the exponent is always positive. See https://github.com/python/typeshed/issues/285. --- opentelemetry-api/src/opentelemetry/trace/sampling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index cc57b122818..a5981109365 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -83,10 +83,11 @@ def __init__(self, rate: float): # The sampler checks the 8 high-order bytes of the trace ID to decide # whether to sample a given trace. CHECK_HIGH_BITS = 64 + TRACE_ID_LIMIT = 2 ** CHECK_HIGH_BITS # type: int @classmethod def get_bound_for_rate(cls, rate: float) -> int: - return round(rate * 2 ** cls.CHECK_HIGH_BITS) + return round(rate * cls.TRACE_ID_LIMIT) @property def rate(self) -> float: From bd16b671a9c05c85e61cbb752dad6ef625d0e8fa Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Fri, 7 Feb 2020 15:56:30 -0800 Subject: [PATCH 4/9] Update sampler to check low-order bits --- .../src/opentelemetry/trace/sampling.py | 9 ++- .../tests/trace/test_sampling.py | 56 ++++++++++--------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index a5981109365..71bebfe8ea7 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -80,10 +80,9 @@ def __init__(self, rate: float): self._rate = rate self._bound = self.get_bound_for_rate(self._rate) - # The sampler checks the 8 high-order bytes of the trace ID to decide - # whether to sample a given trace. - CHECK_HIGH_BITS = 64 - TRACE_ID_LIMIT = 2 ** CHECK_HIGH_BITS # type: int + # For compatability with 64 bit trace IDs, the sampler checks the 64 + # low-order bits of the trace ID to decide whether to sample a given trace. + TRACE_ID_LIMIT = (1 << 64) - 1 @classmethod def get_bound_for_rate(cls, rate: float) -> int: @@ -113,7 +112,7 @@ def should_sample( if parent_context is not None: return Decision(parent_context.trace_options.sampled) - return Decision(trace_id >> self.CHECK_HIGH_BITS < self.bound) + return Decision(trace_id & self.TRACE_ID_LIMIT < self.bound) # Samplers that ignore the parent sampling decision and never/always sample. diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py index 5ab9bdc33fb..32ec1b45ab3 100644 --- a/opentelemetry-api/tests/trace/test_sampling.py +++ b/opentelemetry-api/tests/trace/test_sampling.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import unittest from opentelemetry import trace @@ -121,18 +122,12 @@ def test_probability_sampler(self): # null self.assertTrue( sampler.should_sample( - None, - 0x7FFFFFFFFFFFFFFF0000000000000000, - 0xDEADBEEF, - "span name", + None, 0x7FFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" ).sampled ) self.assertFalse( sampler.should_sample( - None, - 0x80000000000000000000000000000000, - 0xDEADBEEF, - "span name", + None, 0x8000000000000000, 0xDEADBEEF, "span name" ).sampled ) @@ -143,7 +138,7 @@ def test_probability_sampler(self): trace.SpanContext( 0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT ), - 0x7FFFFFFFFFFFFFFF0000000000000000, + 0x7FFFFFFFFFFFFFFF, 0xDEADBEEF, "span name", ).sampled @@ -153,7 +148,7 @@ def test_probability_sampler(self): trace.SpanContext( 0xDEADBEF0, 0xDEADBEF1, trace_options=TO_SAMPLED ), - 0x80000000000000000000000000000000, + 0x8000000000000000, 0xDEADBEEF, "span name", ).sampled @@ -171,10 +166,7 @@ def test_probability_sampler_one(self): default_off = sampling.ProbabilitySampler(1.0) self.assertTrue( default_off.should_sample( - None, - 0xFFFFFFFFFFFFFFFF0000000000000000, - 0xDEADBEEF, - "span name", + None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" ).sampled ) @@ -191,17 +183,16 @@ def test_probability_sampler_limits(self): ) self.assertFalse( almost_always_off.should_sample( - None, 0x10000000000000000, 0xDEADBEEF, "span name" + None, 0x1, 0xDEADBEEF, "span name" ).sampled ) self.assertEqual( sampling.ProbabilitySampler.get_bound_for_rate(2 ** -64), 0x1 ) - # Sample every trace with trace ID less than - # 0xffffffffffffffff0000000000000000. In principle this is the highest - # possible sampling rate less than 1, but we can't actually express - # this rate as a float! + # Sample every trace with trace ID less than 0xffffffffffffffff. In + # principle this is the highest possible sampling rate less than 1, but + # we can't actually express this rate as a float! # # In practice, the highest possible sampling rate is: # @@ -210,10 +201,7 @@ def test_probability_sampler_limits(self): almost_always_on = sampling.ProbabilitySampler(1 - 2 ** -64) self.assertTrue( almost_always_on.should_sample( - None, - 0xFFFFFFFFFFFFFFFE0000000000000000, - 0xDEADBEEF, - "span name", + None, 0xFFFFFFFFFFFFFFFE, 0xDEADBEEF, "span name" ).sampled ) @@ -224,12 +212,30 @@ def test_probability_sampler_limits(self): # self.assertFalse( # almost_always_on.should_sample( # None, - # 0xFFFFFFFFFFFFFFFF0000000000000000, + # 0xFFFFFFFFFFFFFFFF, # 0xDEADBEEF, # "span name", # ).sampled # ) # self.assertEqual( # sampling.ProbabilitySampler.get_bound_for_rate(1 - 2 ** -64)), - # 0xFFFFFFFFFFFFFFFF0000000000000000, + # 0xFFFFFFFFFFFFFFFF, # ) + + # Check that a sampler with the highest effective sampling rate < 1 + # refuses to sample traces with trace ID 0xffffffffffffffff. + almost_almost_always_on = sampling.ProbabilitySampler( + 1 - sys.float_info.epsilon + ) + self.assertFalse( + almost_almost_always_on.should_sample( + None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" + ).sampled + ) + # Check that the higest effective sampling rate is actually lower than + # the highest theoretical sampling rate. If this test fails the test + # above is wrong. + self.assertLess( + almost_almost_always_on.bound + 0xFFFFFFFFFFFFFFFF, + ) From f8602dfc9f21d4753d54520b2179d850ec20b447 Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Fri, 7 Feb 2020 16:02:23 -0800 Subject: [PATCH 5/9] Fix typo --- opentelemetry-api/tests/trace/test_sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py index 32ec1b45ab3..04da521742e 100644 --- a/opentelemetry-api/tests/trace/test_sampling.py +++ b/opentelemetry-api/tests/trace/test_sampling.py @@ -236,6 +236,6 @@ def test_probability_sampler_limits(self): # the highest theoretical sampling rate. If this test fails the test # above is wrong. self.assertLess( - almost_almost_always_on.bound + almost_almost_always_on.bound, 0xFFFFFFFFFFFFFFFF, ) From 20fb95c55807539893606fe0dc55221fcf309f9d Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Fri, 7 Feb 2020 16:19:35 -0800 Subject: [PATCH 6/9] Blacken --- opentelemetry-api/tests/trace/test_sampling.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py index 04da521742e..f04aecef45b 100644 --- a/opentelemetry-api/tests/trace/test_sampling.py +++ b/opentelemetry-api/tests/trace/test_sampling.py @@ -236,6 +236,5 @@ def test_probability_sampler_limits(self): # the highest theoretical sampling rate. If this test fails the test # above is wrong. self.assertLess( - almost_almost_always_on.bound, - 0xFFFFFFFFFFFFFFFF, + almost_almost_always_on.bound, 0xFFFFFFFFFFFFFFFF, ) From 9791acc0920cd765238ad4db6bc998106ed9a8f9 Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Mon, 10 Feb 2020 17:32:36 -0800 Subject: [PATCH 7/9] Update opentelemetry-api/src/opentelemetry/trace/sampling.py Co-Authored-By: alrex --- opentelemetry-api/src/opentelemetry/trace/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index a39ca4ced10..d905a3824e3 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -82,7 +82,7 @@ def __init__(self, rate: float): self._rate = rate self._bound = self.get_bound_for_rate(self._rate) - # For compatability with 64 bit trace IDs, the sampler checks the 64 + # For compatibility with 64 bit trace IDs, the sampler checks the 64 # low-order bits of the trace ID to decide whether to sample a given trace. TRACE_ID_LIMIT = (1 << 64) - 1 From 18299e2b69b9e293e4e642783cd98fcacaade1ec Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Mon, 10 Feb 2020 18:02:08 -0800 Subject: [PATCH 8/9] Fix OBOE --- opentelemetry-api/src/opentelemetry/trace/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index d905a3824e3..c6ace5e88e2 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -88,7 +88,7 @@ def __init__(self, rate: float): @classmethod def get_bound_for_rate(cls, rate: float) -> int: - return round(rate * cls.TRACE_ID_LIMIT) + return round(rate * cls.TRACE_ID_LIMIT + 1) @property def rate(self) -> float: From 2747f235618f47aa17335908e248f181229a39c4 Mon Sep 17 00:00:00 2001 From: Chris Kleinknecht Date: Mon, 10 Feb 2020 18:47:47 -0800 Subject: [PATCH 9/9] Parens --- opentelemetry-api/src/opentelemetry/trace/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py index c6ace5e88e2..503c2e03eb0 100644 --- a/opentelemetry-api/src/opentelemetry/trace/sampling.py +++ b/opentelemetry-api/src/opentelemetry/trace/sampling.py @@ -88,7 +88,7 @@ def __init__(self, rate: float): @classmethod def get_bound_for_rate(cls, rate: float) -> int: - return round(rate * cls.TRACE_ID_LIMIT + 1) + return round(rate * (cls.TRACE_ID_LIMIT + 1)) @property def rate(self) -> float: