From fdd30e5e6a7b95e48fc1f93707fb38d8c6740543 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Mon, 30 Jun 2025 18:57:08 +0200 Subject: [PATCH 1/9] chore(ci): Basic slo breach prototype --- .../benchmarks/bp-runner.fail-on-breach.yml | 46 +++++++++++++++++++ .gitlab/macrobenchmarks.yml | 36 +++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 .gitlab/benchmarks/bp-runner.fail-on-breach.yml diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml new file mode 100644 index 00000000000..ec8abe4da9d --- /dev/null +++ b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml @@ -0,0 +1,46 @@ +# Example of measurements can be seen here: +# https://benchmarking.us1.prod.dog/benchmarks?benchmarkGroupPipelineId=66629462&benchmarkGroupSha=16054515e292a66c5eaf79b9ea62df6f348cd67e&page=1&ciJobDateStart=1746309551994&ciJobDateEnd=1748901551994&benchmarkId=14167634 + +# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/spaces/APMINT/pages/5070193198/How+to+set+up+pre-release+performance+quality+gates#How-to-choose-thresholds-for-pre-release-gates%3F + +experiments: + - name: Run SLO breach check + steps: + - name: SLO breach check + run: fail_on_breach + # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-a-warning-range-for-pre-release-gates%3F + warning_range: 10 + # File spec + # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#Specification + # Measurements + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario + scenarios: + + # Standard macrobenchmarks + - name: normal_operation/only-tracing + thresholds: + - agg_http_req_duration_p50 < 2.28 ms + - name: normal_operation/only-tracing + thresholds: + - agg_http_req_duration_p99 < 7.45 ms + - name: normal_operation/otel-latest + thresholds: + - agg_http_req_duration_p50 < 2.28 ms + - name: normal_operation/otel-latest + thresholds: + - agg_http_req_duration_p99 < 9.00 ms + + - name: high_load/only-tracing + thresholds: + - throughput > 1400.0 op/s + - name: high_load/otel-latest + thresholds: + - throughput > 1400.0 op/s + + # Startup macrobenchmarks + - name: "startup:petclinic:(tracing|appsec|iast):GlobalTracer" + thresholds: + - execution_time < 245 ms + - name: "startup:petclinic:profiling:GlobalTracer" + thresholds: + - execution_time < 368 ms diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index b408b67ec94..ab7dfde01bb 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -1,3 +1,8 @@ +include: + project: 'DataDog/benchmarking-platform-tools' + file: 'images/templates/gitlab/notify-slo-breaches.template.yml' + ref: '925e0a3e7dd628885f6fc69cdaea5c8cc9e212bc' + .macrobenchmarks: stage: macrobenchmarks rules: @@ -68,3 +73,34 @@ otel-latest: BP_BENCHMARKS_CONFIGURATION: otel-latest TRACER_OPTS: -javaagent:/app/otel-java-agent.jar -Ddd.env=otel-latest -Ddd.service=bp-java-petclinic JAVA_OPTS: -javaagent:/app/memcheck/stability-testing-memwatch.jar -Xmx128M + + +check-slo-breaches: + stage: macrobenchmarks + when: always + tags: ["arch:amd64"] + image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest + needs: + - job: baseline + artifacts: true + - job: only-tracing + artifacts: true + - job: otel-latest + artifacts: true + artifacts: + name: "artifacts" + when: always + paths: + - platform/artifacts/ + expire_in: 3 months + script: + - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" + - bp-runner .gitlab/benchmarks/bp-runner.fail-on-breach.yml + +notify-slo-breaches: + extends: .notify-slo-breaches + stage: macrobenchmarks + needs: ["check-slo-breaches"] + when: always + variables: + CHANNEL: "apm-release-platform" From 6407c4230096a3eddc072c9ac39e736b2292c27f Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Wed, 2 Jul 2025 15:26:46 +0200 Subject: [PATCH 2/9] chore(ci): PR review --- .gitlab/benchmarks/bp-runner.fail-on-breach.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml index ec8abe4da9d..762f31e98e8 100644 --- a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml +++ b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml @@ -1,7 +1,4 @@ -# Example of measurements can be seen here: -# https://benchmarking.us1.prod.dog/benchmarks?benchmarkGroupPipelineId=66629462&benchmarkGroupSha=16054515e292a66c5eaf79b9ea62df6f348cd67e&page=1&ciJobDateStart=1746309551994&ciJobDateEnd=1748901551994&benchmarkId=14167634 - -# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/spaces/APMINT/pages/5070193198/How+to+set+up+pre-release+performance+quality+gates#How-to-choose-thresholds-for-pre-release-gates%3F +# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-thresholds-for-pre-release-gates%3F experiments: - name: Run SLO breach check @@ -20,14 +17,10 @@ experiments: - name: normal_operation/only-tracing thresholds: - agg_http_req_duration_p50 < 2.28 ms - - name: normal_operation/only-tracing - thresholds: - agg_http_req_duration_p99 < 7.45 ms - name: normal_operation/otel-latest thresholds: - agg_http_req_duration_p50 < 2.28 ms - - name: normal_operation/otel-latest - thresholds: - agg_http_req_duration_p99 < 9.00 ms - name: high_load/only-tracing From 736aefdcd5e9acd03be9e0ea3567db707458378b Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Wed, 9 Jul 2025 18:38:27 +0200 Subject: [PATCH 3/9] chore: Collect reports from benchmarks --- .gitlab/macrobenchmarks.yml | 44 ++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index ab7dfde01bb..53098878e76 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -62,6 +62,8 @@ baseline: only-tracing: extends: .macrobenchmarks + rules: + - when: on_success variables: BP_BENCHMARKS_CONFIGURATION: only-tracing TRACER_OPTS: -javaagent:/app/dd-java-agent.jar -Ddd.env=${BP_BENCHMARKS_CONFIGURATION} -Ddd.service=bp-java-petclinic @@ -77,9 +79,10 @@ otel-latest: check-slo-breaches: stage: macrobenchmarks - when: always + interruptible: true tags: ["arch:amd64"] image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest + when: on_success needs: - job: baseline artifacts: true @@ -87,15 +90,40 @@ check-slo-breaches: artifacts: true - job: otel-latest artifacts: true - artifacts: - name: "artifacts" - when: always - paths: - - platform/artifacts/ - expire_in: 3 months + - job: benchmarks-startup + artifacts: true + - job: benchmarks-load + artifacts: true + - job: benchmarks-dacapo + artifacts: true script: - - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" + # macrobenchmarks are located here, files are already in "converted" format + - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}" + + # Need to move the artifacts the benchmarks-* job + - | + export BENCHMARKS_ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${BENCHMARKS_ARTIFACTS_DIR}" + for benchmarkType in startup load; do + find "$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do + relpath="${file#$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType/}" + prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json + prefix="${prefix#./}" # Remove any leading ./ + prefix="${prefix//\//-}" # Replace / with - + case "$file" in + *benchmark-baseline.json) type="baseline" ;; + *benchmark-candidate.json) type="candidate" ;; + esac + echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json" + cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json" + done + done + - ls -lah "$ARTIFACTS_DIR" - bp-runner .gitlab/benchmarks/bp-runner.fail-on-breach.yml + variables: + UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance. + UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java" + UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built. + UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for. notify-slo-breaches: extends: .notify-slo-breaches From 361aaf6e6c0139e3e9436b5ef4a1e9b2cb1e9fe3 Mon Sep 17 00:00:00 2001 From: Dmytro Yurchenko Date: Fri, 11 Jul 2025 15:28:00 +0200 Subject: [PATCH 4/9] tweak: Include benchmarkType in file name --- .gitlab/macrobenchmarks.yml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index 53098878e76..70a04bc0738 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -6,12 +6,7 @@ include: .macrobenchmarks: stage: macrobenchmarks rules: - - if: $POPULATE_CACHE - when: never - - if: ($NIGHTLY_BENCHMARKS || $CI_PIPELINE_SOURCE != "schedule") && $CI_COMMIT_REF_NAME == "master" - when: always - - when: manual - allow_failure: true + - when: on_success # TODO: PLEASE revert before merging the PR tags: ["runner:apm-k8s-same-cpu"] needs: ["build"] interruptible: true @@ -62,8 +57,6 @@ baseline: only-tracing: extends: .macrobenchmarks - rules: - - when: on_success variables: BP_BENCHMARKS_CONFIGURATION: only-tracing TRACER_OPTS: -javaagent:/app/dd-java-agent.jar -Ddd.env=${BP_BENCHMARKS_CONFIGURATION} -Ddd.service=bp-java-petclinic @@ -113,8 +106,8 @@ check-slo-breaches: *benchmark-baseline.json) type="baseline" ;; *benchmark-candidate.json) type="candidate" ;; esac - echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json" - cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json" + echo "Moving $file to $ARTIFACTS_DIR/${type}-${benchmarkType}-${prefix}.converted.json" + cp "$file" "$ARTIFACTS_DIR/${type}-${benchmarkType}-${prefix}.converted.json" done done - ls -lah "$ARTIFACTS_DIR" From 92f9783853e270d2bdc23580fd90280b0baddc50 Mon Sep 17 00:00:00 2001 From: Dmytro Yurchenko Date: Fri, 11 Jul 2025 15:28:53 +0200 Subject: [PATCH 5/9] tweak: Store artifacts that were used for checking regression, so we can debug --- .gitlab/macrobenchmarks.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index 70a04bc0738..d435f00ba5c 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -112,6 +112,12 @@ check-slo-breaches: done - ls -lah "$ARTIFACTS_DIR" - bp-runner .gitlab/benchmarks/bp-runner.fail-on-breach.yml + artifacts: + name: "artifacts" + when: always + paths: + - platform/artifacts/ + expire_in: 1 week variables: UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance. UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java" From 67435a9ab7543f0858e95f96b2a66dcafcf7dcc0 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Fri, 11 Jul 2025 17:44:17 +0200 Subject: [PATCH 6/9] chore(ci): Tweak thresholds --- .../benchmarks/bp-runner.fail-on-breach.yml | 23 +++++++++++++------ .gitlab/macrobenchmarks.yml | 8 ++++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml index 762f31e98e8..b44e2f0e963 100644 --- a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml +++ b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml @@ -12,28 +12,37 @@ experiments: # Measurements # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario scenarios: + # Note that thresholds there are choosen based the confidence interval with a 10% adjustment. # Standard macrobenchmarks + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario - name: normal_operation/only-tracing thresholds: - - agg_http_req_duration_p50 < 2.28 ms - - agg_http_req_duration_p99 < 7.45 ms + - agg_http_req_duration_p50 < 2.36 ms + - agg_http_req_duration_p99 < 7.89 ms + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario - name: normal_operation/otel-latest thresholds: - - agg_http_req_duration_p50 < 2.28 ms - - agg_http_req_duration_p99 < 9.00 ms + - agg_http_req_duration_p50 < 2.34 ms + - agg_http_req_duration_p99 < 9.50 ms + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario - name: high_load/only-tracing thresholds: - - throughput > 1400.0 op/s + - throughput > 1100.0 op/s + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fotel-latest&trendsType=scenario - name: high_load/otel-latest thresholds: - - throughput > 1400.0 op/s + - throughput > 1100.0 op/s # Startup macrobenchmarks + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Atracing%3AGlobalTracer&trendsType=scenario + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aappsec%3AGlobalTracer&trendsType=scenario + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aiast%3AGlobalTracer&trendsType=scenario - name: "startup:petclinic:(tracing|appsec|iast):GlobalTracer" thresholds: - - execution_time < 245 ms + - execution_time < 260 ms + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aprofiling%3AGlobalTracer&trendsType=scenario - name: "startup:petclinic:profiling:GlobalTracer" thresholds: - execution_time < 368 ms diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index d435f00ba5c..ec7c7647208 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -6,6 +6,12 @@ include: .macrobenchmarks: stage: macrobenchmarks rules: +# - if: $POPULATE_CACHE +# when: never +# - if: ($NIGHTLY_BENCHMARKS || $CI_PIPELINE_SOURCE != "schedule") && $CI_COMMIT_REF_NAME == "master" +# when: always +# - when: manual +# allow_failure: true - when: on_success # TODO: PLEASE revert before merging the PR tags: ["runner:apm-k8s-same-cpu"] needs: ["build"] @@ -96,7 +102,7 @@ check-slo-breaches: # Need to move the artifacts the benchmarks-* job - | export BENCHMARKS_ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${BENCHMARKS_ARTIFACTS_DIR}" - for benchmarkType in startup load; do + for benchmarkType in startup load dacapo; do find "$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do relpath="${file#$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType/}" prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json From 616ae973d75a9e4d681df57f613feebdd2125d9c Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Thu, 17 Jul 2025 11:26:30 +0200 Subject: [PATCH 7/9] chore(ci): Tweak to recommended thresholds See https://github.com/DataDog/dd-trace-java/pull/9068/files#r2210474360 --- .gitlab/benchmarks/bp-runner.fail-on-breach.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml index b44e2f0e963..eefdde73508 100644 --- a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml +++ b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml @@ -23,8 +23,8 @@ experiments: # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario - name: normal_operation/otel-latest thresholds: - - agg_http_req_duration_p50 < 2.34 ms - - agg_http_req_duration_p99 < 9.50 ms + - agg_http_req_duration_p50 < 2.5 ms + - agg_http_req_duration_p99 < 10 ms # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario - name: high_load/only-tracing @@ -41,8 +41,8 @@ experiments: # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aiast%3AGlobalTracer&trendsType=scenario - name: "startup:petclinic:(tracing|appsec|iast):GlobalTracer" thresholds: - - execution_time < 260 ms + - execution_time < 280 ms # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aprofiling%3AGlobalTracer&trendsType=scenario - name: "startup:petclinic:profiling:GlobalTracer" thresholds: - - execution_time < 368 ms + - execution_time < 420 ms From 98f9a310ab3a6c31f7f7c27203ecdb930c70e478 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Thu, 17 Jul 2025 13:38:25 +0200 Subject: [PATCH 8/9] chore(ci): Another tweak to recommended thresholds See https://github.com/DataDog/dd-trace-java/pull/9068/files#r2210474360 --- .gitlab/benchmarks/bp-runner.fail-on-breach.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml index eefdde73508..bb2211a27fe 100644 --- a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml +++ b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml @@ -18,8 +18,8 @@ experiments: # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario - name: normal_operation/only-tracing thresholds: - - agg_http_req_duration_p50 < 2.36 ms - - agg_http_req_duration_p99 < 7.89 ms + - agg_http_req_duration_p50 < 2.6 ms + - agg_http_req_duration_p99 < 8.5 ms # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario - name: normal_operation/otel-latest thresholds: From 098e01236c9fba95f7646fc6cba26c7726441e28 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Thu, 17 Jul 2025 14:49:37 +0200 Subject: [PATCH 9/9] chore(ci): Revert hack to run the release gate (it needed the all macrobenchmarks) --- .gitlab/macrobenchmarks.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index ec7c7647208..cbf3fb00588 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -6,13 +6,12 @@ include: .macrobenchmarks: stage: macrobenchmarks rules: -# - if: $POPULATE_CACHE -# when: never -# - if: ($NIGHTLY_BENCHMARKS || $CI_PIPELINE_SOURCE != "schedule") && $CI_COMMIT_REF_NAME == "master" -# when: always -# - when: manual -# allow_failure: true - - when: on_success # TODO: PLEASE revert before merging the PR + - if: $POPULATE_CACHE + when: never + - if: ($NIGHTLY_BENCHMARKS || $CI_PIPELINE_SOURCE != "schedule") && $CI_COMMIT_REF_NAME == "master" + when: always + - when: manual + allow_failure: true tags: ["runner:apm-k8s-same-cpu"] needs: ["build"] interruptible: true