From d9cf9fa9059401ebc46f0619e2d991e8e9271540 Mon Sep 17 00:00:00 2001 From: Joey Zhao <5253430+joeyzhao2018@users.noreply.github.com> Date: Tue, 4 Mar 2025 15:56:32 -0500 Subject: [PATCH 01/26] chore: remove the hardcoded 2.21.3 version in dockerfile (#568) * remove the hardcoded 2.21.3 version in dockerfile * Apply suggestions from code review Co-authored-by: datadog-datadog-prod-us1[bot] <88084959+datadog-datadog-prod-us1[bot]@users.noreply.github.com> --------- Co-authored-by: Munir Abdinur Co-authored-by: datadog-datadog-prod-us1[bot] <88084959+datadog-datadog-prod-us1[bot]@users.noreply.github.com> --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 757d671a..7f522e5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ ENV PATH=/root/.cargo/bin:$PATH # Install datadog_lambda and dependencies from local COPY . . -RUN pip install . ddtrace==2.21.3 -t ./python/lib/$runtime/site-packages +RUN pip install --no-cache-dir . -t ./python/lib/$runtime/site-packages # Remove botocore (40MB) to reduce package size. aws-xray-sdk # installs it, while it's already provided by the Lambda Runtime. From aec49103a8a68362bea6bf1509b7bc51df7a3088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 4 Mar 2025 15:58:02 -0500 Subject: [PATCH 02/26] add `CODEOWNERS` (#569) --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..e53b2646 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @DataDog/serverless-aws From 53306e38df8514525649789495baa7cad91eecd6 Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Tue, 4 Mar 2025 16:45:00 -0500 Subject: [PATCH 03/26] chore: new govcloud release script (#567) also refactoring the environments template datasource and adding layer bundling jobs. --- ci/datasources/environments.yaml | 4 +- ci/input_files/build.yaml.tpl | 66 +++++++++++--- ci/publish_layers.sh | 36 ++++++-- scripts/publish_govcloud.sh | 105 ++++++++++++++++++++++ scripts/publish_prod.sh | 144 +++++++++++++------------------ 5 files changed, 255 insertions(+), 100 deletions(-) create mode 100755 scripts/publish_govcloud.sh diff --git a/ci/datasources/environments.yaml b/ci/datasources/environments.yaml index 90056ab0..1ae2b4d7 100644 --- a/ci/datasources/environments.yaml +++ b/ci/datasources/environments.yaml @@ -1,9 +1,9 @@ environments: - - name: sandbox + sandbox: external_id: sandbox-publish-externalid role_to_assume: sandbox-layer-deployer account: 425362996713 - - name: prod + prod: external_id: prod-publish-externalid role_to_assume: dd-serverless-layer-deployer-role account: 464622532012 diff --git a/ci/input_files/build.yaml.tpl b/ci/input_files/build.yaml.tpl index eae6b0d1..769f87f5 100644 --- a/ci/input_files/build.yaml.tpl +++ b/ci/input_files/build.yaml.tpl @@ -103,9 +103,6 @@ integration-test ({{ $runtime.name }}-{{ $runtime.arch }}): script: - RUNTIME_PARAM={{ $runtime.python_version }} ARCH={{ $runtime.arch }} ./scripts/run_integration_tests.sh -{{ range $environment := (ds "environments").environments }} - -{{ if or (eq $environment.name "prod") }} sign-layer ({{ $runtime.name }}-{{ $runtime.arch }}): stage: sign tags: ["arch:amd64"] @@ -128,22 +125,25 @@ sign-layer ({{ $runtime.name }}-{{ $runtime.arch }}): before_script: - apt-get update - apt-get install -y uuid-runtime + {{ with $environment := (ds "environments").environments.prod }} - EXTERNAL_ID_NAME={{ $environment.external_id }} ROLE_TO_ASSUME={{ $environment.role_to_assume }} AWS_ACCOUNT={{ $environment.account }} source ./ci/get_secrets.sh + {{ end }} script: - - LAYER_FILE=datadog_lambda_py-{{ $runtime.arch}}-{{ $runtime.python_version }}.zip ./scripts/sign_layers.sh {{ $environment.name }} -{{ end }} + - LAYER_FILE=datadog_lambda_py-{{ $runtime.arch}}-{{ $runtime.python_version }}.zip ./scripts/sign_layers.sh prod + +{{ range $environment_name, $environment := (ds "environments").environments }} -publish-layer-{{ $environment.name }} ({{ $runtime.name }}-{{ $runtime.arch }}): +publish-layer-{{ $environment_name }} ({{ $runtime.name }}-{{ $runtime.arch }}): stage: publish tags: ["arch:amd64"] image: registry.ddbuild.io/images/docker:20.10-py3 rules: - - if: '"{{ $environment.name }}" =~ /^(sandbox|staging)/' + - if: '"{{ $environment_name }}" == "sandbox"' when: manual allow_failure: true - if: '$CI_COMMIT_TAG =~ /^v.*/' needs: -{{ if or (eq $environment.name "prod") }} +{{ if or (eq $environment_name "prod") }} - sign-layer ({{ $runtime.name }}-{{ $runtime.arch}}) {{ else }} - build-layer ({{ $runtime.name }}-{{ $runtime.arch }}) @@ -153,7 +153,7 @@ publish-layer-{{ $environment.name }} ({{ $runtime.name }}-{{ $runtime.arch }}): - integration-test ({{ $runtime.name }}-{{ $runtime.arch }}) {{ end }} dependencies: -{{ if or (eq $environment.name "prod") }} +{{ if or (eq $environment_name "prod") }} - sign-layer ({{ $runtime.name }}-{{ $runtime.arch}}) {{ else }} - build-layer ({{ $runtime.name }}-{{ $runtime.arch }}) @@ -166,7 +166,7 @@ publish-layer-{{ $environment.name }} ({{ $runtime.name }}-{{ $runtime.arch }}): before_script: - EXTERNAL_ID_NAME={{ $environment.external_id }} ROLE_TO_ASSUME={{ $environment.role_to_assume }} AWS_ACCOUNT={{ $environment.account }} source ./ci/get_secrets.sh script: - - STAGE={{ $environment.name }} PYTHON_VERSION={{ $runtime.python_version }} ARCH={{ $runtime.arch }} ./ci/publish_layers.sh + - STAGE={{ $environment_name }} PYTHON_VERSION={{ $runtime.python_version }} ARCH={{ $runtime.arch }} ./ci/publish_layers.sh {{- end }} @@ -186,3 +186,49 @@ publish-pypi-package: {{- end }} script: - ./ci/publish_pypi.sh + +layer bundle: + stage: build + tags: ["arch:amd64"] + image: registry.ddbuild.io/images/docker:20.10 + needs: + {{ range (ds "runtimes").runtimes }} + - build-layer ({{ .name }}-{{ .arch }}) + {{ end }} + dependencies: + {{ range (ds "runtimes").runtimes }} + - build-layer ({{ .name }}-{{ .arch }}) + {{ end }} + artifacts: + expire_in: 1 hr + paths: + - datadog_lambda_py-bundle-${CI_JOB_ID}/ + name: datadog_lambda_py-bundle-${CI_JOB_ID} + script: + - rm -rf datadog_lambda_py-bundle-${CI_JOB_ID} + - mkdir -p datadog_lambda_py-bundle-${CI_JOB_ID} + - cp .layers/datadog_lambda_py-*.zip datadog_lambda_py-bundle-${CI_JOB_ID} + +signed layer bundle: + stage: sign + image: registry.ddbuild.io/images/docker:20.10-py3 + tags: ["arch:amd64"] + rules: + - if: '$CI_COMMIT_TAG =~ /^v.*/' + needs: + {{ range (ds "runtimes").runtimes }} + - sign-layer ({{ .name }}-{{ .arch }}) + {{ end }} + dependencies: + {{ range (ds "runtimes").runtimes }} + - sign-layer ({{ .name }}-{{ .arch }}) + {{ end }} + artifacts: + expire_in: 1 day + paths: + - datadog_lambda_py-signed-bundle-${CI_JOB_ID}/ + name: datadog_lambda_py-signed-bundle-${CI_JOB_ID} + script: + - rm -rf datadog_lambda_py-signed-bundle-${CI_JOB_ID} + - mkdir -p datadog_lambda_py-signed-bundle-${CI_JOB_ID} + - cp .layers/datadog_lambda_py-*.zip datadog_lambda_py-signed-bundle-${CI_JOB_ID} diff --git a/ci/publish_layers.sh b/ci/publish_layers.sh index 85317ddd..58257bf1 100755 --- a/ci/publish_layers.sh +++ b/ci/publish_layers.sh @@ -24,7 +24,20 @@ AWS_CLI_PYTHON_VERSIONS=( "python3.13" "python3.13" ) -PYTHON_VERSIONS=("3.8-amd64" "3.8-arm64" "3.9-amd64" "3.9-arm64" "3.10-amd64" "3.10-arm64" "3.11-amd64" "3.11-arm64" "3.12-amd64" "3.12-arm64" "3.13-amd64" "3.13-arm64") +PYTHON_VERSIONS=( + "3.8-amd64" + "3.8-arm64" + "3.9-amd64" + "3.9-arm64" + "3.10-amd64" + "3.10-arm64" + "3.11-amd64" + "3.11-arm64" + "3.12-amd64" + "3.12-arm64" + "3.13-amd64" + "3.13-arm64" +) LAYER_PATHS=( ".layers/datadog_lambda_py-amd64-3.8.zip" ".layers/datadog_lambda_py-arm64-3.8.zip" @@ -53,11 +66,16 @@ LAYERS=( "Datadog-Python313" "Datadog-Python313-ARM" ) -STAGES=('prod', 'sandbox', 'staging') +STAGES=('prod', 'sandbox', 'staging', 'gov-staging', 'gov-prod') printf "Starting script...\n\n" -printf "Installing dependencies\n" -pip install awscli + +if [ -z "$SKIP_PIP_INSTALL" ]; then + echo "Installing dependencies" + pip install awscli +else + echo "Skipping pip install" +fi publish_layer() { region=$1 @@ -89,7 +107,7 @@ fi printf "Python version specified: $PYTHON_VERSION\n" if [[ ! ${PYTHON_VERSIONS[@]} =~ $PYTHON_VERSION ]]; then - printf "[Error] Unsupported PYTHON_VERSION found.\n" + printf "[Error] Unsupported PYTHON_VERSION found: $PYTHON_VERSION.\n" exit 1 fi @@ -133,8 +151,14 @@ if [[ ! ${STAGES[@]} =~ $STAGE ]]; then fi layer="${LAYERS[$index]}" +if [ -z "$LAYER_NAME_SUFFIX" ]; then + echo "No layer name suffix" +else + layer="${layer}-${LAYER_NAME_SUFFIX}" +fi +echo "layer name: $layer" -if [[ "$STAGE" =~ ^(staging|sandbox)$ ]]; then +if [[ "$STAGE" =~ ^(staging|sandbox|gov-staging)$ ]]; then # Deploy latest version latest_version=$(aws lambda list-layer-versions --region $REGION --layer-name $layer --query 'LayerVersions[0].Version || `0`') VERSION=$(($latest_version + 1)) diff --git a/scripts/publish_govcloud.sh b/scripts/publish_govcloud.sh new file mode 100755 index 00000000..5fd107b0 --- /dev/null +++ b/scripts/publish_govcloud.sh @@ -0,0 +1,105 @@ +#! /usr/bin/env bash + +# Unless explicitly stated otherwise all files in this repository are licensed +# under the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2025 Datadog, Inc. +# +# USAGE: download the layer bundle from the build pipeline in gitlab. Use the +# Download button on the `layer bundle` job. This will be a zip file containing +# all of the required layers. Run this script as follows: +# +# ENVIRONMENT=[us1-staging-fed or us1-fed] [LAYER_NAME_SUFFIX=optional-layer-suffix] [REGIONS=us-gov-west-1] ./scripts/publish_govcloud.sh +# +# protip: you can drag the zip file from finder into your terminal to insert +# its path. + +set -e + +LAYER_PACKAGE=$1 + +if [ -z "$LAYER_PACKAGE" ]; then + printf "[ERROR]: layer package not provided\n" + exit 1 +fi + +PACKAGE_NAME=$(basename "$LAYER_PACKAGE" .zip) + +if [ -z "$ENVIRONMENT" ]; then + printf "[ERROR]: ENVIRONMENT not specified\n" + exit 1 +fi + +if [ "$ENVIRONMENT" = "us1-staging-fed" ]; then + AWS_VAULT_ROLE=sso-govcloud-us1-staging-fed-power-user + + export STAGE=gov-staging + + if [[ ! "$PACKAGE_NAME" =~ ^datadog_lambda_py-(signed-)?bundle-[0-9]+$ ]]; then + echo "[ERROR]: Unexpected package name: $PACKAGE_NAME" + exit 1 + fi + +elif [ $ENVIRONMENT = "us1-fed" ]; then + AWS_VAULT_ROLE=sso-govcloud-us1-fed-engineering + + export STAGE=gov-prod + + if [[ ! "$PACKAGE_NAME" =~ ^datadog_lambda_py-signed-bundle-[0-9]+$ ]]; then + echo "[ERROR]: Unexpected package name: $PACKAGE_NAME" + exit 1 + fi + +else + printf "[ERROR]: ENVIRONMENT not supported, must be us1-staging-fed or us1-fed.\n" + exit 1 +fi + +TEMP_DIR=$(mktemp -d) +unzip $LAYER_PACKAGE -d $TEMP_DIR +cp -v $TEMP_DIR/$PACKAGE_NAME/*.zip .layers/ + + +AWS_VAULT_PREFIX="aws-vault exec $AWS_VAULT_ROLE --" + +echo "Checking that you have access to the GovCloud AWS account" +$AWS_VAULT_PREFIX aws sts get-caller-identity + + +AVAILABLE_REGIONS=$($AWS_VAULT_PREFIX aws ec2 describe-regions | jq -r '.[] | .[] | .RegionName') + +# Determine the target regions +if [ -z "$REGIONS" ]; then + echo "Region not specified, running for all available regions." + REGIONS=$AVAILABLE_REGIONS +else + echo "Region specified: $REGIONS" + if [[ ! "$AVAILABLE_REGIONS" == *"$REGIONS"* ]]; then + echo "Could not find $REGIONS in available regions: $AVAILABLE_REGIONS" + echo "" + echo "EXITING SCRIPT." + exit 1 + fi +fi + +for region in $REGIONS +do + echo "Starting publishing layers for region $region..." + + export REGION=$region + + for python_version in "3.8" "3.9" "3.10" "3.11" "3.12" "3.13"; do + for arch in "amd64" "arm64"; do + export PYTHON_VERSION=$python_version + export ARCH=$arch + + export SKIP_PIP_INSTALL=true + + echo "Publishing layer for $PYTHON_VERSION and $ARCH" + + $AWS_VAULT_PREFIX ./ci/publish_layers.sh + done + done +done + +echo "Done !" diff --git a/scripts/publish_prod.sh b/scripts/publish_prod.sh index f3d13653..d2918c54 100755 --- a/scripts/publish_prod.sh +++ b/scripts/publish_prod.sh @@ -6,6 +6,11 @@ set -e read -p "Are we only doing the simplified GovCloud release? ONLY IF THE NORMAL RELEASE IS DONE AND YOU HAVE DOWNLOADED THE LAYERS (y/n)? " GOVCLOUD_ONLY +if [ $GOVCLOUD_ONLY != "n" ]; then + echo "GovCloud publishing is now supported only in publich_govcloud.sh" + exit 1 +fi + # Ensure on main, and pull the latest BRANCH=$(git rev-parse --abbrev-ref HEAD) if [ $BRANCH != "main" ]; then @@ -34,107 +39,82 @@ else fi # Ensure pypi registry access -if [ $GOVCLOUD_ONLY == "y" ]; then - echo "Skipping PyPI check since this is a GovCloud-only release" - -else - read -p "Do you have access to PyPI (y/n)?" CONT - if [ "$CONT" != "y" ]; then - echo "Exiting" - exit 1 - fi +read -p "Do you have access to PyPI (y/n)?" CONT +if [ "$CONT" != "y" ]; then + echo "Exiting" + exit 1 fi CURRENT_VERSION=$(poetry version --short) LAYER_VERSION=$(echo $NEW_VERSION | cut -d '.' -f 2) -if [ $GOVCLOUD_ONLY == "y" ]; then - echo "Skipping Libary Updates, code changes, layer builds and signing for GovCloud-only release" +read -p "Ready to update the library version from $CURRENT_VERSION to $NEW_VERSION and publish layer version $LAYER_VERSION (y/n)?" CONT +if [ "$CONT" != "y" ]; then + echo "Exiting" + exit 1 +fi +echo "Answer 'n' if already done in a PR" +read -p "Update pyproject.toml version? (y/n)?" CONT +if [ "$CONT" != "y" ]; then + echo "Skipping updating package.json version" else - read -p "Ready to update the library version from $CURRENT_VERSION to $NEW_VERSION and publish layer version $LAYER_VERSION (y/n)?" CONT - if [ "$CONT" != "y" ]; then - echo "Exiting" - exit 1 - fi - - echo "Answer 'n' if already done in a PR" - read -p "Update pyproject.toml version? (y/n)?" CONT - if [ "$CONT" != "y" ]; then - echo "Skipping updating package.json version" - else - echo - echo "Replacing version in pyproject.toml and datadog_lambda/version.py" - echo - - poetry version ${NEW_VERSION} - echo "__version__ = \"${NEW_VERSION}\"" > datadog_lambda/version.py - fi - echo - echo "Building layers..." - ./scripts/build_layers.sh - + echo "Replacing version in pyproject.toml and datadog_lambda/version.py" echo - echo "Signing layers for commercial AWS regions" - aws-vault exec sso-prod-engineering -- ./scripts/sign_layers.sh prod - - echo "Answer 'n' if GitLab already did this" - read -p "Deploy layers to commercial AWS (y/n)?" CONT - if [ "$CONT" != "y" ]; then - echo "Skipping deployment to commercial AWS" - else - echo "Ensuring you have access to the production AWS account" - aws-vault exec sso-prod-engineering -- aws sts get-caller-identity - - echo - echo "Publishing layers to commercial AWS regions" - VERSION=$LAYER_VERSION aws-vault exec sso-prod-engineering --no-session -- ./scripts/publish_layers.sh - fi + + poetry version ${NEW_VERSION} + echo "__version__ = \"${NEW_VERSION}\"" > datadog_lambda/version.py fi -read -p "Deploy layers to GovCloud AWS (y/n)?" CONT +echo +echo "Building layers..." +./scripts/build_layers.sh + +echo +echo "Signing layers for commercial AWS regions" +aws-vault exec sso-prod-engineering -- ./scripts/sign_layers.sh prod + +echo "Answer 'n' if GitLab already did this" +read -p "Deploy layers to commercial AWS (y/n)?" CONT if [ "$CONT" != "y" ]; then - echo "Skipping deployment to GovCloud AWS" + echo "Skipping deployment to commercial AWS" else - echo "Ensuring you have access to the AWS GovCloud account" - aws-vault exec sso-govcloud-us1-fed-engineering -- aws sts get-caller-identity + echo "Ensuring you have access to the production AWS account" + aws-vault exec sso-prod-engineering -- aws sts get-caller-identity - echo "Publishing layers to GovCloud AWS regions" - VERSION=$LAYER_VERSION aws-vault exec sso-govcloud-us1-fed-engineering -- ./scripts/publish_layers.sh + echo + echo "Publishing layers to commercial AWS regions" + VERSION=$LAYER_VERSION aws-vault exec sso-prod-engineering --no-session -- ./scripts/publish_layers.sh fi -if [ $GOVCLOUD_ONLY == "y" ]; then - echo "Skipping PyPI check and Github Release since this is a GovCloud-only release" - +echo "Answer 'n' if GitLab already did this" +read -p "Ready to publish $NEW_VERSION to PyPI (y/n)?" CONT +if [ "$CONT" != "y" ]; then + echo "Skipping publishing to PyPI" else - echo "Answer 'n' if GitLab already did this" - read -p "Ready to publish $NEW_VERSION to PyPI (y/n)?" CONT - if [ "$CONT" != "y" ]; then - echo "Skipping publishing to PyPI" - else - echo - echo "Publishing to https://pypi.org/project/datadog-lambda/" - ./scripts/pypi.sh - fi - - - echo "Answer 'n' if you already released in GitHub" - read -p "Do you want to bump the version in GitHub? (y/n)" CONT - if [ "$CONT" != "y" ]; then - echo "Skipping publishing updates to GitHub" - else - echo - echo 'Publishing updates to github' - git commit pyproject.toml datadog_lambda/version.py -m "Bump version to ${NEW_VERSION}" - git push origin main - git tag "v$LAYER_VERSION" - git push origin "refs/tags/v$LAYER_VERSION" - fi + echo + echo "Publishing to https://pypi.org/project/datadog-lambda/" + ./scripts/pypi.sh +fi + +echo "Answer 'n' if you already released in GitHub" +read -p "Do you want to bump the version in GitHub? (y/n)" CONT +if [ "$CONT" != "y" ]; then + echo "Skipping publishing updates to GitHub" +else echo - echo "Now create a new release with the tag v${LAYER_VERSION} created unless you have done this already" - echo "https://github.com/DataDog/datadog-lambda-python/releases/new?tag=v$LAYER_VERSION&title=v$LAYER_VERSION" + echo 'Publishing updates to github' + git commit pyproject.toml datadog_lambda/version.py -m "Bump version to ${NEW_VERSION}" + git push origin main + git tag "v$LAYER_VERSION" + git push origin "refs/tags/v$LAYER_VERSION" fi + +echo +echo "Now create a new release with the tag v${LAYER_VERSION} created unless you have done this already" +echo "https://github.com/DataDog/datadog-lambda-python/releases/new?tag=v$LAYER_VERSION&title=v$LAYER_VERSION" + # Open a PR to the documentation repo to automatically bump layer version VERSION=$LAYER_VERSION LAYER=datadog-lambda-python ./scripts/create_documentation_pr.sh From dc1ccba4b81d80097341057bf786c25ec0efc52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 4 Mar 2025 17:06:41 -0500 Subject: [PATCH 04/26] update `CODEOWNERS` (#570) --- .github/CODEOWNERS | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e53b2646..26b4b78e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,6 @@ -* @DataDog/serverless-aws +* @DataDog/serverless-aws +datadog_lambda/tracing.py @DataDog/apm-serverless +datadog_lambda/patch.py @DataDog/apm-serverless +datadog_lambda/span_points.py @DataDog/apm-serverless +datadog_lambda/cold_start.py @DataDog/apm-serverless +datadog_lambda/wrapper.py @DataDog/apm-serverless From fc0beaae643d59a4745ae6d8da163ad39cdb5970 Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Fri, 7 Mar 2025 10:24:16 -0800 Subject: [PATCH 05/26] Pin ddtrace to Date: Mon, 10 Mar 2025 19:53:38 -0400 Subject: [PATCH 06/26] Enable LLM Observability with `agentless_enabled=True` by default with a parsed API key (#572) * enable llmobs agentless with parsed api_key * extract getting api key to its own function * lint --- datadog_lambda/api.py | 58 ++++++++++++++++++++++++--------------- datadog_lambda/wrapper.py | 9 +++++- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index b5414fd9..a114fe8f 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -4,6 +4,7 @@ logger = logging.getLogger(__name__) KMS_ENCRYPTION_CONTEXT_KEY = "LambdaFunctionName" +api_key = None def decrypt_kms_api_key(kms_client, ciphertext): @@ -46,6 +47,40 @@ def decrypt_kms_api_key(kms_client, ciphertext): return plaintext +def get_api_key() -> str: + """ + Gets the Datadog API key from the environment variables or secrets manager. + Extracts the result to a global value to avoid repeated calls to the + secrets manager from different products. + """ + global api_key + if api_key: + return api_key + + import boto3 + + DD_API_KEY_SECRET_ARN = os.environ.get("DD_API_KEY_SECRET_ARN", "") + DD_API_KEY_SSM_NAME = os.environ.get("DD_API_KEY_SSM_NAME", "") + DD_KMS_API_KEY = os.environ.get("DD_KMS_API_KEY", "") + DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", "")) + + if DD_API_KEY_SECRET_ARN: + api_key = boto3.client("secretsmanager").get_secret_value( + SecretId=DD_API_KEY_SECRET_ARN + )["SecretString"] + elif DD_API_KEY_SSM_NAME: + api_key = boto3.client("ssm").get_parameter( + Name=DD_API_KEY_SSM_NAME, WithDecryption=True + )["Parameter"]["Value"] + elif DD_KMS_API_KEY: + kms_client = boto3.client("kms") + api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) + else: + api_key = DD_API_KEY + + return api_key + + def init_api(): if not os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true": # Make sure that this package would always be lazy-loaded/outside from the critical path @@ -54,28 +89,7 @@ def init_api(): from datadog import api if not api._api_key: - import boto3 - - DD_API_KEY_SECRET_ARN = os.environ.get("DD_API_KEY_SECRET_ARN", "") - DD_API_KEY_SSM_NAME = os.environ.get("DD_API_KEY_SSM_NAME", "") - DD_KMS_API_KEY = os.environ.get("DD_KMS_API_KEY", "") - DD_API_KEY = os.environ.get( - "DD_API_KEY", os.environ.get("DATADOG_API_KEY", "") - ) - - if DD_API_KEY_SECRET_ARN: - api._api_key = boto3.client("secretsmanager").get_secret_value( - SecretId=DD_API_KEY_SECRET_ARN - )["SecretString"] - elif DD_API_KEY_SSM_NAME: - api._api_key = boto3.client("ssm").get_parameter( - Name=DD_API_KEY_SSM_NAME, WithDecryption=True - )["Parameter"]["Value"] - elif DD_KMS_API_KEY: - kms_client = boto3.client("kms") - api._api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) - else: - api._api_key = DD_API_KEY + api._api_key = get_api_key() logger.debug("Setting DATADOG_API_KEY of length %d", len(api._api_key)) diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index 2632d22e..6afa9a07 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -56,10 +56,14 @@ if profiling_env_var: from ddtrace.profiling import profiler +llmobs_api_key = None llmobs_env_var = os.environ.get("DD_LLMOBS_ENABLED", "false").lower() in ("true", "1") if llmobs_env_var: + from datadog_lambda.api import get_api_key from ddtrace.llmobs import LLMObs + llmobs_api_key = get_api_key() + logger = logging.getLogger(__name__) DD_FLUSH_TO_LOG = "DD_FLUSH_TO_LOG" @@ -229,7 +233,10 @@ def __init__(self, func): # Enable LLM Observability if llmobs_env_var: - LLMObs.enable() + LLMObs.enable( + agentless_enabled=True, + api_key=llmobs_api_key, + ) logger.debug("datadog_lambda_wrapper initialized") except Exception as e: From 71b64fa1de4c0e56bb49cc2201a87da0d10409c5 Mon Sep 17 00:00:00 2001 From: Nicholas Hulston Date: Wed, 12 Mar 2025 14:16:42 -0400 Subject: [PATCH 07/26] Use FIPs endpoints in Govcloud regions (#575) --- datadog_lambda/api.py | 31 +++++++++++++-- tests/test_api.py | 89 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 tests/test_api.py diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index a114fe8f..03135912 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -64,16 +64,41 @@ def get_api_key() -> str: DD_KMS_API_KEY = os.environ.get("DD_KMS_API_KEY", "") DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", "")) + REGION = os.environ.get("AWS_REGION", "") + is_gov_region = REGION.startswith("us-gov-") + if is_gov_region: + logger.debug( + "Govcloud region detected. Using FIPs endpoints for secrets management." + ) + if DD_API_KEY_SECRET_ARN: - api_key = boto3.client("secretsmanager").get_secret_value( + # Secrets manager endpoints: https://docs.aws.amazon.com/general/latest/gr/asm.html + fips_endpoint = ( + f"https://secretsmanager-fips.{REGION}.amazonaws.com" + if is_gov_region + else None + ) + secrets_manager_client = boto3.client( + "secretsmanager", endpoint_url=fips_endpoint + ) + api_key = secrets_manager_client.get_secret_value( SecretId=DD_API_KEY_SECRET_ARN )["SecretString"] elif DD_API_KEY_SSM_NAME: - api_key = boto3.client("ssm").get_parameter( + # SSM endpoints: https://docs.aws.amazon.com/general/latest/gr/ssm.html + fips_endpoint = ( + f"https://ssm-fips.{REGION}.amazonaws.com" if is_gov_region else None + ) + ssm_client = boto3.client("ssm", endpoint_url=fips_endpoint) + api_key = ssm_client.get_parameter( Name=DD_API_KEY_SSM_NAME, WithDecryption=True )["Parameter"]["Value"] elif DD_KMS_API_KEY: - kms_client = boto3.client("kms") + # KMS endpoints: https://docs.aws.amazon.com/general/latest/gr/kms.html + fips_endpoint = ( + f"https://kms-fips.{REGION}.amazonaws.com" if is_gov_region else None + ) + kms_client = boto3.client("kms", endpoint_url=fips_endpoint) api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) else: api_key = DD_API_KEY diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 00000000..a69f4382 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,89 @@ +import os +import unittest +from unittest.mock import patch, MagicMock + +import datadog_lambda.api as api + + +class TestDatadogLambdaAPI(unittest.TestCase): + def setUp(self): + api.api_key = None + self.env_patcher = patch.dict( + os.environ, + { + "DD_API_KEY_SECRET_ARN": "", + "DD_API_KEY_SSM_NAME": "", + "DD_KMS_API_KEY": "", + "DD_API_KEY": "", + "DATADOG_API_KEY": "", + "AWS_REGION": "", + }, + clear=True, + ) + self.env_patcher.start() + + @patch("boto3.client") + def test_secrets_manager_fips_endpoint(self, mock_boto3_client): + mock_client = MagicMock() + mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} + mock_boto3_client.return_value = mock_client + + os.environ["AWS_REGION"] = "us-gov-east-1" + os.environ["DD_API_KEY_SECRET_ARN"] = "test-secrets-arn" + + api_key = api.get_api_key() + + mock_boto3_client.assert_called_with( + "secretsmanager", + endpoint_url="https://secretsmanager-fips.us-gov-east-1.amazonaws.com", + ) + self.assertEqual(api_key, "test-api-key") + + @patch("boto3.client") + def test_ssm_fips_endpoint(self, mock_boto3_client): + mock_client = MagicMock() + mock_client.get_parameter.return_value = { + "Parameter": {"Value": "test-api-key"} + } + mock_boto3_client.return_value = mock_client + + os.environ["AWS_REGION"] = "us-gov-west-1" + os.environ["DD_API_KEY_SSM_NAME"] = "test-ssm-param" + + api_key = api.get_api_key() + + mock_boto3_client.assert_called_with( + "ssm", endpoint_url="https://ssm-fips.us-gov-west-1.amazonaws.com" + ) + self.assertEqual(api_key, "test-api-key") + + @patch("boto3.client") + @patch("datadog_lambda.api.decrypt_kms_api_key") + def test_kms_fips_endpoint(self, mock_decrypt_kms, mock_boto3_client): + mock_client = MagicMock() + mock_boto3_client.return_value = mock_client + mock_decrypt_kms.return_value = "test-api-key" + + os.environ["AWS_REGION"] = "us-gov-west-1" + os.environ["DD_KMS_API_KEY"] = "encrypted-api-key" + + api_key = api.get_api_key() + + mock_boto3_client.assert_called_with( + "kms", endpoint_url="https://kms-fips.us-gov-west-1.amazonaws.com" + ) + self.assertEqual(api_key, "test-api-key") + + @patch("boto3.client") + def test_no_fips_for_standard_regions(self, mock_boto3_client): + mock_client = MagicMock() + mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} + mock_boto3_client.return_value = mock_client + + os.environ.clear() + os.environ["AWS_REGION"] = "us-west-2" + os.environ["DD_API_KEY_SECRET_ARN"] = "test-arn" + + api.get_api_key() + + mock_boto3_client.assert_called_with("secretsmanager", endpoint_url=None) From 7d7c15346f871d9a5d3333a8f95334b7fe3d2bf3 Mon Sep 17 00:00:00 2001 From: Nicholas Hulston Date: Fri, 14 Mar 2025 14:14:07 -0700 Subject: [PATCH 08/26] Use correct SecretsManager region (#576) --- datadog_lambda/api.py | 21 ++++++++++++++------- tests/test_api.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index 03135912..ad860873 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -64,8 +64,8 @@ def get_api_key() -> str: DD_KMS_API_KEY = os.environ.get("DD_KMS_API_KEY", "") DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", "")) - REGION = os.environ.get("AWS_REGION", "") - is_gov_region = REGION.startswith("us-gov-") + LAMBDA_REGION = os.environ.get("AWS_REGION", "") + is_gov_region = LAMBDA_REGION.startswith("us-gov-") if is_gov_region: logger.debug( "Govcloud region detected. Using FIPs endpoints for secrets management." @@ -73,13 +73,20 @@ def get_api_key() -> str: if DD_API_KEY_SECRET_ARN: # Secrets manager endpoints: https://docs.aws.amazon.com/general/latest/gr/asm.html - fips_endpoint = ( - f"https://secretsmanager-fips.{REGION}.amazonaws.com" + try: + secrets_region = DD_API_KEY_SECRET_ARN.split(":")[3] + except Exception: + logger.debug( + "Invalid secret arn in DD_API_KEY_SECRET_ARN. Unable to get API key." + ) + return "" + endpoint_url = ( + f"https://secretsmanager-fips.{secrets_region}.amazonaws.com" if is_gov_region else None ) secrets_manager_client = boto3.client( - "secretsmanager", endpoint_url=fips_endpoint + "secretsmanager", endpoint_url=endpoint_url, region_name=secrets_region ) api_key = secrets_manager_client.get_secret_value( SecretId=DD_API_KEY_SECRET_ARN @@ -87,7 +94,7 @@ def get_api_key() -> str: elif DD_API_KEY_SSM_NAME: # SSM endpoints: https://docs.aws.amazon.com/general/latest/gr/ssm.html fips_endpoint = ( - f"https://ssm-fips.{REGION}.amazonaws.com" if is_gov_region else None + f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None ) ssm_client = boto3.client("ssm", endpoint_url=fips_endpoint) api_key = ssm_client.get_parameter( @@ -96,7 +103,7 @@ def get_api_key() -> str: elif DD_KMS_API_KEY: # KMS endpoints: https://docs.aws.amazon.com/general/latest/gr/kms.html fips_endpoint = ( - f"https://kms-fips.{REGION}.amazonaws.com" if is_gov_region else None + f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None ) kms_client = boto3.client("kms", endpoint_url=fips_endpoint) api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) diff --git a/tests/test_api.py b/tests/test_api.py index a69f4382..c7facb43 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -29,13 +29,36 @@ def test_secrets_manager_fips_endpoint(self, mock_boto3_client): mock_boto3_client.return_value = mock_client os.environ["AWS_REGION"] = "us-gov-east-1" - os.environ["DD_API_KEY_SECRET_ARN"] = "test-secrets-arn" + os.environ[ + "DD_API_KEY_SECRET_ARN" + ] = "arn:aws:secretsmanager:us-gov-east-1:1234567890:secret:key-name-123ABC" api_key = api.get_api_key() mock_boto3_client.assert_called_with( "secretsmanager", endpoint_url="https://secretsmanager-fips.us-gov-east-1.amazonaws.com", + region_name="us-gov-east-1", + ) + self.assertEqual(api_key, "test-api-key") + + @patch("boto3.client") + def test_secrets_manager_different_region(self, mock_boto3_client): + mock_client = MagicMock() + mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} + mock_boto3_client.return_value = mock_client + + os.environ["AWS_REGION"] = "us-east-1" + os.environ[ + "DD_API_KEY_SECRET_ARN" + ] = "arn:aws:secretsmanager:us-west-1:1234567890:secret:key-name-123ABC" + + api_key = api.get_api_key() + + mock_boto3_client.assert_called_with( + "secretsmanager", + endpoint_url=None, + region_name="us-west-1", ) self.assertEqual(api_key, "test-api-key") @@ -82,8 +105,12 @@ def test_no_fips_for_standard_regions(self, mock_boto3_client): os.environ.clear() os.environ["AWS_REGION"] = "us-west-2" - os.environ["DD_API_KEY_SECRET_ARN"] = "test-arn" + os.environ[ + "DD_API_KEY_SECRET_ARN" + ] = "arn:aws:secretsmanager:us-west-2:1234567890:secret:key-name-123ABC" api.get_api_key() - mock_boto3_client.assert_called_with("secretsmanager", endpoint_url=None) + mock_boto3_client.assert_called_with( + "secretsmanager", endpoint_url=None, region_name="us-west-2" + ) From 5a55fe45a49b8d7e6c8db0734f203bdff648f383 Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Mon, 17 Mar 2025 13:16:39 -0700 Subject: [PATCH 09/26] Use sam/build-python images for building layers. (#577) --- scripts/build_layers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_layers.sh b/scripts/build_layers.sh index a0d6ee39..23941b7a 100755 --- a/scripts/build_layers.sh +++ b/scripts/build_layers.sh @@ -61,7 +61,7 @@ function docker_build_zip { # between different python runtimes. temp_dir=$(mktemp -d) docker buildx build -t datadog-lambda-python-${arch}:$1 . --no-cache \ - --build-arg image=public.ecr.aws/docker/library/python:$1 \ + --build-arg image=public.ecr.aws/sam/build-python$1:1 \ --build-arg runtime=python$1 \ --platform linux/${arch} \ --progress=plain \ From 8398da08d0c6a6de4459b9619969bbb97b8895ee Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Tue, 18 Mar 2025 13:11:15 -0700 Subject: [PATCH 10/26] Lazy load boto client when using datadogpy for metrics. (#558) * Lazy load boto client when using datadogpy for metrics. * Update test mocking. --- datadog_lambda/api.py | 14 +++++++++----- pyproject.toml | 4 ++-- tests/test_api.py | 10 +++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index ad860873..c539ea05 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -57,8 +57,6 @@ def get_api_key() -> str: if api_key: return api_key - import boto3 - DD_API_KEY_SECRET_ARN = os.environ.get("DD_API_KEY_SECRET_ARN", "") DD_API_KEY_SSM_NAME = os.environ.get("DD_API_KEY_SSM_NAME", "") DD_KMS_API_KEY = os.environ.get("DD_KMS_API_KEY", "") @@ -85,7 +83,7 @@ def get_api_key() -> str: if is_gov_region else None ) - secrets_manager_client = boto3.client( + secrets_manager_client = _boto3_client( "secretsmanager", endpoint_url=endpoint_url, region_name=secrets_region ) api_key = secrets_manager_client.get_secret_value( @@ -96,7 +94,7 @@ def get_api_key() -> str: fips_endpoint = ( f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None ) - ssm_client = boto3.client("ssm", endpoint_url=fips_endpoint) + ssm_client = _boto3_client("ssm", endpoint_url=fips_endpoint) api_key = ssm_client.get_parameter( Name=DD_API_KEY_SSM_NAME, WithDecryption=True )["Parameter"]["Value"] @@ -105,7 +103,7 @@ def get_api_key() -> str: fips_endpoint = ( f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None ) - kms_client = boto3.client("kms", endpoint_url=fips_endpoint) + kms_client = _boto3_client("kms", endpoint_url=fips_endpoint) api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) else: api_key = DD_API_KEY @@ -133,3 +131,9 @@ def init_api(): # Unmute exceptions from datadog api client, so we can catch and handle them api._mute = False + + +def _boto3_client(*args, **kwargs): + import botocore.session + + return botocore.session.get_session().create_client(*args, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index a3860567..8db5e352 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ datadog = ">=0.51.0,<1.0.0" wrapt = "^1.11.2" ddtrace = ">=2.20.0,<4" ujson = ">=5.9.0" -boto3 = { version = "^1.34.0", optional = true } +botocore = { version = "^1.34.0", optional = true } requests = { version ="^2.22.0", optional = true } pytest = { version= "^8.0.0", optional = true } pytest-benchmark = { version = "^4.0", optional = true } @@ -38,7 +38,7 @@ flake8 = { version = "^5.0.4", optional = true } [tool.poetry.extras] dev = [ - "boto3", + "botocore", "flake8", "pytest", "pytest-benchmark", diff --git a/tests/test_api.py b/tests/test_api.py index c7facb43..c98d91eb 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,7 +22,7 @@ def setUp(self): ) self.env_patcher.start() - @patch("boto3.client") + @patch("botocore.session.Session.create_client") def test_secrets_manager_fips_endpoint(self, mock_boto3_client): mock_client = MagicMock() mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} @@ -42,7 +42,7 @@ def test_secrets_manager_fips_endpoint(self, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") - @patch("boto3.client") + @patch("botocore.session.Session.create_client") def test_secrets_manager_different_region(self, mock_boto3_client): mock_client = MagicMock() mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} @@ -62,7 +62,7 @@ def test_secrets_manager_different_region(self, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") - @patch("boto3.client") + @patch("botocore.session.Session.create_client") def test_ssm_fips_endpoint(self, mock_boto3_client): mock_client = MagicMock() mock_client.get_parameter.return_value = { @@ -80,7 +80,7 @@ def test_ssm_fips_endpoint(self, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") - @patch("boto3.client") + @patch("botocore.session.Session.create_client") @patch("datadog_lambda.api.decrypt_kms_api_key") def test_kms_fips_endpoint(self, mock_decrypt_kms, mock_boto3_client): mock_client = MagicMock() @@ -97,7 +97,7 @@ def test_kms_fips_endpoint(self, mock_decrypt_kms, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") - @patch("boto3.client") + @patch("botocore.session.Session.create_client") def test_no_fips_for_standard_regions(self, mock_boto3_client): mock_client = MagicMock() mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} From 58a55bb4e4ef6fabe85580a3914c1f4d060f9378 Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Tue, 18 Mar 2025 13:11:44 -0700 Subject: [PATCH 11/26] Re-remove iast .so file. (#560) * Re-remove iast .so file. Commit https://github.com/DataDog/dd-trace-py/commit/fa18def52e13f863bd8de48cb8ef88feba0caf92 was merged to address this. * Re-remove taint tracking. --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f522e5a..0e79d884 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,10 +22,10 @@ RUN rm -rf ./python/lib/$runtime/site-packages/botocore* RUN rm -rf ./python/lib/$runtime/site-packages/setuptools RUN rm -rf ./python/lib/$runtime/site-packages/jsonschema/tests RUN find . -name 'libddwaf.so' -delete -# Comment this line out for now since ddtrace now tries to import it -# RUN rm ./python/lib/$runtime/site-packages/ddtrace/appsec/_iast/_stacktrace*.so -RUN rm ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/libdd_wrapper*.so -RUN rm ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/ddup/_ddup.*.so +RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/appsec/_iast/_taint_tracking/*.so +RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/appsec/_iast/_stacktrace*.so +RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/libdd_wrapper*.so +RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/ddup/_ddup.*.so # _stack_v2 may not exist for some versions of ddtrace (e.g. under python 3.13) RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/stack_v2/_stack_v2.*.so # remove *.dist-info directories except any entry_points.txt files From 96a6abd227a328d94d233d7baf951f2975f1b267 Mon Sep 17 00:00:00 2001 From: Abhinav Vedmala Date: Wed, 19 Mar 2025 09:50:53 -0400 Subject: [PATCH 12/26] Propagate Step Function Trace Context through Managed Services (#573) Allows us to extract Step Function trace context in the following cases 1. SFN -> EventBridge -> Lambda 2. SFN -> EventBridge -> SQS -> Lambda 3. SFN -> SQS -> Lambda 4. SFN -> SNS -> Lambda 5. SFN -> SNS -> SQS -> Lambda --- datadog_lambda/tracing.py | 81 ++++++--- datadog_lambda/trigger.py | 30 ++- datadog_lambda/wrapper.py | 3 - tests/test_tracing.py | 374 ++++++++++++++++++++++++-------------- tests/test_trigger.py | 66 +++++++ 5 files changed, 387 insertions(+), 167 deletions(-) diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index a73423e1..0fae76dd 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -39,6 +39,7 @@ _EventSource, parse_event_source, get_first_record, + is_step_function_event, EventTypes, EventSubtypes, ) @@ -271,6 +272,15 @@ def extract_context_from_sqs_or_sns_event_or_context(event, lambda_context): if dd_json_data: dd_data = json.loads(dd_json_data) + + if is_step_function_event(dd_data): + try: + return extract_context_from_step_functions(dd_data, None) + except Exception: + logger.debug( + "Failed to extract Step Functions context from SQS/SNS event." + ) + return propagator.extract(dd_data) else: # Handle case where trace context is injected into attributes.AWSTraceHeader @@ -313,6 +323,15 @@ def _extract_context_from_eventbridge_sqs_event(event): body = json.loads(body_str) detail = body.get("detail") dd_context = detail.get("_datadog") + + if is_step_function_event(dd_context): + try: + return extract_context_from_step_functions(dd_context, None) + except Exception: + logger.debug( + "Failed to extract Step Functions context from EventBridge to SQS event." + ) + return propagator.extract(dd_context) @@ -320,12 +339,23 @@ def extract_context_from_eventbridge_event(event, lambda_context): """ Extract datadog trace context from an EventBridge message's Details. This is only possible if Details is a JSON string. + + If we find a Step Function context, try to extract the trace context from + that header. """ try: detail = event.get("detail") dd_context = detail.get("_datadog") if not dd_context: return extract_context_from_lambda_context(lambda_context) + + try: + return extract_context_from_step_functions(dd_context, None) + except Exception: + logger.debug( + "Failed to extract Step Functions context from EventBridge event." + ) + return propagator.extract(dd_context) except Exception as e: logger.debug("The trace extractor returned with error %s", e) @@ -424,7 +454,7 @@ def _generate_sfn_trace_id(execution_id: str, part: str): def extract_context_from_step_functions(event, lambda_context): """ Only extract datadog trace context when Step Functions Context Object is injected - into lambda's event dict. + into lambda's event dict. Unwrap "Payload" if it exists to handle Legacy Lambda cases. If '_datadog' header is present, we have two cases: 1. Root is a Lambda and we use its traceID @@ -435,25 +465,25 @@ def extract_context_from_step_functions(event, lambda_context): object. """ try: + event = event.get("Payload", event) + event = event.get("_datadog", event) + meta = {} - dd_data = event.get("_datadog") - if dd_data and dd_data.get("serverless-version") == "v1": - if "x-datadog-trace-id" in dd_data: # lambda root - trace_id = int(dd_data.get("x-datadog-trace-id")) - high_64_bit_trace_id = _parse_high_64_bits( - dd_data.get("x-datadog-tags") - ) + if event.get("serverless-version") == "v1": + if "x-datadog-trace-id" in event: # lambda root + trace_id = int(event.get("x-datadog-trace-id")) + high_64_bit_trace_id = _parse_high_64_bits(event.get("x-datadog-tags")) if high_64_bit_trace_id: meta["_dd.p.tid"] = high_64_bit_trace_id else: # sfn root - root_execution_id = dd_data.get("RootExecutionId") + root_execution_id = event.get("RootExecutionId") trace_id = _generate_sfn_trace_id(root_execution_id, LOWER_64_BITS) meta["_dd.p.tid"] = _generate_sfn_trace_id( root_execution_id, HIGHER_64_BITS ) - parent_id = _generate_sfn_parent_id(dd_data) + parent_id = _generate_sfn_parent_id(event) else: execution_id = event.get("Execution").get("Id") trace_id = _generate_sfn_trace_id(execution_id, LOWER_64_BITS) @@ -472,20 +502,6 @@ def extract_context_from_step_functions(event, lambda_context): return extract_context_from_lambda_context(lambda_context) -def is_legacy_lambda_step_function(event): - """ - Check if the event is a step function that called a legacy lambda - """ - if not isinstance(event, dict) or "Payload" not in event: - return False - - event = event.get("Payload") - return isinstance(event, dict) and ( - "_datadog" in event - or ("Execution" in event and "StateMachine" in event and "State" in event) - ) - - def extract_context_custom_extractor(extractor, event, lambda_context): """ Extract Datadog trace context using a custom trace extractor function @@ -1309,8 +1325,18 @@ def create_inferred_span_from_eventbridge_event(event, context): synchronicity="async", tag_source="self", ) - dt_format = "%Y-%m-%dT%H:%M:%SZ" + timestamp = event.get("time") + dt_format = "%Y-%m-%dT%H:%M:%SZ" + + # Use more granular timestamp from upstream Step Function if possible + try: + if is_step_function_event(event.get("detail")): + timestamp = event["detail"]["_datadog"]["State"]["EnteredTime"] + dt_format = "%Y-%m-%dT%H:%M:%S.%fZ" + except (TypeError, KeyError, AttributeError): + logger.debug("Error parsing timestamp from Step Functions event") + dt = datetime.strptime(timestamp, dt_format) tracer.set_tags(_dd_origin) @@ -1320,6 +1346,11 @@ def create_inferred_span_from_eventbridge_event(event, context): if span: span.set_tags(tags) span.start = dt.replace(tzinfo=timezone.utc).timestamp() + + # Since inferred span will later parent Lambda, preserve Lambda's current parent + if dd_trace_context.span_id: + span.parent_id = dd_trace_context.span_id + return span diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 11759a0a..708138bf 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -146,9 +146,7 @@ def parse_event_source(event: dict) -> _EventSource: if event.get("source") == "aws.events" or has_event_categories: event_source = _EventSource(EventTypes.CLOUDWATCH_EVENTS) - if ( - "_datadog" in event and event.get("_datadog").get("serverless-version") == "v1" - ) or ("Execution" in event and "StateMachine" in event and "State" in event): + if is_step_function_event(event): event_source = _EventSource(EventTypes.STEPFUNCTIONS) event_record = get_first_record(event) @@ -369,3 +367,29 @@ def extract_http_status_code_tag(trigger_tags, response): status_code = response.status_code return str(status_code) + + +def is_step_function_event(event): + """ + Check if the event is a step function that invoked the current lambda. + + The whole event can be wrapped in "Payload" in Legacy Lambda cases. There may also be a + "_datadog" for JSONata style context propagation. + + The actual event must contain "Execution", "StateMachine", and "State" fields. + """ + event = event.get("Payload", event) + + # JSONPath style + if "Execution" in event and "StateMachine" in event and "State" in event: + return True + + # JSONata style + dd_context = event.get("_datadog") + return ( + dd_context + and "Execution" in dd_context + and "StateMachine" in dd_context + and "State" in dd_context + and "serverless-version" in dd_context + ) diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index 6afa9a07..5641bd15 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -45,7 +45,6 @@ is_authorizer_response, tracer, propagator, - is_legacy_lambda_step_function, ) from datadog_lambda.trigger import ( extract_trigger_tags, @@ -286,8 +285,6 @@ def _before(self, event, context): self.response = None set_cold_start(init_timestamp_ns) submit_invocations_metric(context) - if is_legacy_lambda_step_function(event): - event = event["Payload"] self.trigger_tags = extract_trigger_tags(event, context) # Extract Datadog trace context and source from incoming requests dd_context, trace_context_source, event_source = extract_dd_trace_context( diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 5480a92c..0a961a62 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -36,15 +36,12 @@ _convert_xray_trace_id, _convert_xray_entity_id, _convert_xray_sampling, - InferredSpanInfo, create_service_mapping, determine_service_name, service_mapping as global_service_mapping, propagator, emit_telemetry_on_exception_outside_of_handler, - is_legacy_lambda_step_function, ) -from datadog_lambda.trigger import EventTypes from tests.utils import get_mock_context @@ -613,9 +610,39 @@ def test_with_complete_datadog_trace_headers_with_trigger_tags(self): ] ) + def _test_step_function_trace_data_common( + self, event, expected_trace_id, expected_span_id, expected_tid + ): + """Common test logic for step function trace data tests""" + lambda_ctx = get_mock_context() + expected_context = Context( + trace_id=expected_trace_id, + span_id=expected_span_id, + sampling_priority=1, + meta={"_dd.p.tid": expected_tid}, + ) + expected_headers = { + TraceHeader.TRACE_ID: str(expected_trace_id), + TraceHeader.PARENT_ID: "10713633173203262661", + TraceHeader.SAMPLING_PRIORITY: "1", + TraceHeader.TAGS: f"_dd.p.tid={expected_tid}", + } + + ctx, source, _ = extract_dd_trace_context(event, lambda_ctx) + + self.assertEqual(source, "event") + self.assertEqual(ctx, expected_context) + self.assertEqual(get_dd_trace_context(), expected_headers) + + create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) + self.mock_send_segment.assert_called_with( + XraySubsegment.TRACE_KEY, + expected_context, + ) + @with_trace_propagation_style("datadog") def test_step_function_trace_data(self): - lambda_ctx = get_mock_context() + """Test basic step function trace data extraction""" sfn_event = { "Execution": { "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316", @@ -634,79 +661,39 @@ def test_step_function_trace_data(self): "Name": "abhinav-activity-state-machine", }, } - ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) - self.assertEqual(source, "event") - expected_context = Context( - trace_id=435175499815315247, - span_id=3929055471293792800, - sampling_priority=1, - meta={"_dd.p.tid": "3e7a89d1b7310603"}, + self._test_step_function_trace_data_common( + sfn_event, 435175499815315247, 3929055471293792800, "3e7a89d1b7310603" ) - self.assertEqual(ctx, expected_context) - self.assertEqual( - get_dd_trace_context(), - { - TraceHeader.TRACE_ID: "435175499815315247", - TraceHeader.PARENT_ID: "10713633173203262661", - TraceHeader.SAMPLING_PRIORITY: "1", - TraceHeader.TAGS: "_dd.p.tid=3e7a89d1b7310603", + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_retry(self): + """Test step function trace data extraction with non-zero retry count""" + sfn_event = { + "Execution": { + "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316", + "Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316", + "RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j", + "StartTime": "2024-12-04T19:38:04.069Z", + "RedriveCount": 0, }, + "State": { + "Name": "Lambda Invoke", + "EnteredTime": "2024-12-04T19:38:04.118Z", + "RetryCount": 1, + }, + "StateMachine": { + "Id": "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine", + "Name": "abhinav-activity-state-machine", + }, + } + self._test_step_function_trace_data_common( + sfn_event, 435175499815315247, 5063839446130725204, "3e7a89d1b7310603" ) - create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) - self.mock_send_segment.assert_called_with( - XraySubsegment.TRACE_KEY, - expected_context, - ) - - @with_trace_propagation_style("datadog") - def test_step_function_trace_data_retry(self): - lambda_ctx = get_mock_context() - sfn_event = { - "Execution": { - "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316", - "Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316", - "RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j", - "StartTime": "2024-12-04T19:38:04.069Z", - "RedriveCount": 0, - }, - "State": { - "Name": "Lambda Invoke", - "EnteredTime": "2024-12-04T19:38:04.118Z", - "RetryCount": 1, - }, - "StateMachine": { - "Id": "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine", - "Name": "abhinav-activity-state-machine", - }, - } - ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) - self.assertEqual(source, "event") - expected_context = Context( - trace_id=435175499815315247, - span_id=5063839446130725204, - sampling_priority=1, - meta={"_dd.p.tid": "3e7a89d1b7310603"}, - ) - self.assertEqual(ctx, expected_context) - self.assertEqual( - get_dd_trace_context(), - { - TraceHeader.TRACE_ID: "435175499815315247", - TraceHeader.PARENT_ID: "10713633173203262661", - TraceHeader.SAMPLING_PRIORITY: "1", - TraceHeader.TAGS: "_dd.p.tid=3e7a89d1b7310603", - }, - ) - create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) - self.mock_send_segment.assert_called_with( - XraySubsegment.TRACE_KEY, - expected_context, - ) - # https://github.com/DataDog/logs-backend/blob/c17618cb552fc369ca40282bae0a65803f82f694/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L46 + # https://github.com/DataDog/logs-backend/blob/65ea567150f24e5498008f3cf8cabef9ea995f5d/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L45-L46 @with_trace_propagation_style("datadog") def test_step_function_trace_data_redrive(self): - lambda_ctx = get_mock_context() + """Test step function trace data extraction with non-zero redrive count""" sfn_event = { "Execution": { "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316", @@ -725,33 +712,13 @@ def test_step_function_trace_data_redrive(self): "Name": "abhinav-activity-state-machine", }, } - ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) - self.assertEqual(source, "event") - expected_context = Context( - trace_id=435175499815315247, - span_id=8782364156266188026, - sampling_priority=1, - meta={"_dd.p.tid": "3e7a89d1b7310603"}, - ) - self.assertEqual(ctx, expected_context) - self.assertEqual( - get_dd_trace_context(), - { - TraceHeader.TRACE_ID: "435175499815315247", - TraceHeader.PARENT_ID: "10713633173203262661", - TraceHeader.SAMPLING_PRIORITY: "1", - TraceHeader.TAGS: "_dd.p.tid=3e7a89d1b7310603", - }, - ) - create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) - self.mock_send_segment.assert_called_with( - XraySubsegment.TRACE_KEY, - expected_context, + self._test_step_function_trace_data_common( + sfn_event, 435175499815315247, 8782364156266188026, "3e7a89d1b7310603" ) @with_trace_propagation_style("datadog") def test_step_function_trace_data_lambda_root(self): - lambda_ctx = get_mock_context() + """Test JSONata style step function trace data extraction where there's an upstream Lambda""" sfn_event = { "_datadog": { "Execution": { @@ -769,33 +736,13 @@ def test_step_function_trace_data_lambda_root(self): "serverless-version": "v1", } } - ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) - self.assertEqual(source, "event") - expected_context = Context( - trace_id=5821803790426892636, - span_id=6880978411788117524, - sampling_priority=1, - meta={"_dd.p.tid": "672a7cb100000000"}, - ) - self.assertEqual(ctx, expected_context) - self.assertEqual( - get_dd_trace_context(), - { - TraceHeader.TRACE_ID: "5821803790426892636", - TraceHeader.PARENT_ID: "10713633173203262661", - TraceHeader.SAMPLING_PRIORITY: "1", - TraceHeader.TAGS: "_dd.p.tid=672a7cb100000000", - }, - ) - create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) - self.mock_send_segment.assert_called_with( - XraySubsegment.TRACE_KEY, - expected_context, + self._test_step_function_trace_data_common( + sfn_event, 5821803790426892636, 6880978411788117524, "672a7cb100000000" ) @with_trace_propagation_style("datadog") def test_step_function_trace_data_sfn_root(self): - lambda_ctx = get_mock_context() + """Test JSONata style step function trace data extraction where there's an upstream step function""" sfn_event = { "_datadog": { "Execution": { @@ -812,28 +759,183 @@ def test_step_function_trace_data_sfn_root(self): "serverless-version": "v1", } } - ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) - self.assertEqual(source, "event") - expected_context = Context( - trace_id=4521899030418994483, - span_id=6880978411788117524, - sampling_priority=1, - meta={"_dd.p.tid": "12d1270d99cc5e03"}, + self._test_step_function_trace_data_common( + sfn_event, 4521899030418994483, 6880978411788117524, "12d1270d99cc5e03" ) - self.assertEqual(ctx, expected_context) - self.assertEqual( - get_dd_trace_context(), - { - TraceHeader.TRACE_ID: "4521899030418994483", - TraceHeader.PARENT_ID: "10713633173203262661", - TraceHeader.SAMPLING_PRIORITY: "1", - TraceHeader.TAGS: "_dd.p.tid=12d1270d99cc5e03", + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_eventbridge(self): + """Test step function trace data extraction through EventBridge""" + eventbridge_event = { + "version": "0", + "id": "eaacd8db-02de-ab13-ed5a-8ffb84048294", + "detail-type": "StepFunctionTask", + "source": "my.eventbridge", + "account": "425362996713", + "time": "2025-03-13T15:17:34Z", + "region": "sa-east-1", + "resources": [ + "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine", + "arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:912eaa4c-291a-488a-bda3-d06bcc21203d", + ], + "detail": { + "Message": "Hello from Step Functions!", + "TaskToken": "AQCEAAAAKgAAAAMAAAAAAAAAAeMHr6sb8Ll5IKntjIiLGaBkaNeweo84kKYKDTvDaSAP1vjuYRJEGqFdHsKMyZL8ZcgAdanKpkbhPEN5hpoCe+BH9KblWeDsJxkDCk/meN5SaPlC1qS7Q/7/KqBq+tmAOCSy+MjdqFsnihy5Yo6g6C9uuPn7ccSB/609d8pznFm9nigEos/82emwi18lm67/+/bn4RTX4S7qV4RoGWUWUPeHfr34xWOipCt4SVDkoQPZdRVpq3wyRJP2zcK0zup24/opJqKKSCI5Q9orALNB2jEjDyQ9LE4mSrafoe0tcm/bOAGfrcpR3AwtArUiF6JPYd7Nw0XWWyPXFBjiQTJDhZFlGfllJ1N91eiN8wlzUX1+I0vw/t2PoEmuQ2VCJYCbl1ybjX/tQ97GZ9ogjY9N7VYy5uD5xfZ6VAyetUR06HUtbUIXTVxULm7wmsHb979W/fIQXsrxbFzc0+ypKaqGXJBoq7xX//irjpuNhWg1Wgfn0hxuXl5oN/LkqI83T8f9SdnJMxRDpaHDpttqbjVESB/Pf9o7gakjJj12+r2uiJNc81k50uhuHdFOGsImFHKV8hb1LGcq0ZzUKT5SbEDV2k+ezOP+O9Sk4c0unbpNLM3PKLKxVLhu2gtiIIVCHUHGmumW", + "_datadog": { + "Execution": { + "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:912eaa4c-291a-488a-bda3-d06bcc21203d", + "StartTime": "2025-03-13T15:17:33.972Z", + "Name": "912eaa4c-291a-488a-bda3-d06bcc21203d", + "RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j", + "RedriveCount": 0, + }, + "StateMachine": { + "Id": "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine", + "Name": "abhinav-inner-state-machine", + }, + "State": { + "Name": "EventBridge PutEvents", + "EnteredTime": "2025-03-13T15:17:34.008Z", + "RetryCount": 0, + }, + "Task": { + "Token": "AQCEAAAAKgAAAAMAAAAAAAAAAeMHr6sb8Ll5IKntjIiLGaBkaNeweo84kKYKDTvDaSAP1vjuYRJEGqFdHsKMyZL8ZcgAdanKpkbhPEN5hpoCe+BH9KblWeDsJxkDCk/meN5SaPlC1qS7Q/7/KqBq+tmAOCSy+MjdqFsnihy5Yo6g6C9uuPn7ccSB/609d8pznFm9nigEos/82emwi18lm67/+/bn4RTX4S7qV4RoGWUWUPeHfr34xWOipCt4SVDkoQPZdRVpq3wyRJP2zcK0zup24/opJqKKSCI5Q9orALNB2jEjDyQ9LE4mSrafoe0tcm/bOAGfrcpR3AwtArUiF6JPYd7Nw0XWWyPXFBjiQTJDhZFlGfllJ1N91eiN8wlzUX1+I0vw/t2PoEmuQ2VCJYCbl1ybjX/tQ97GZ9ogjY9N7VYy5uD5xfZ6VAyetUR06HUtbUIXTVxULm7wmsHb979W/fIQXsrxbFzc0+ypKaqGXJBoq7xX//irjpuNhWg1Wgfn0hxuXl5oN/LkqI83T8f9SdnJMxRDpaHDpttqbjVESB/Pf9o7gakjJj12+r2uiJNc81k50uhuHdFOGsImFHKV8hb1LGcq0ZzUKT5SbEDV2k+ezOP+O9Sk4c0unbpNLM3PKLKxVLhu2gtiIIVCHUHGmumW" + }, + "RootExecutionId": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:912eaa4c-291a-488a-bda3-d06bcc21203d", + "serverless-version": "v1", + }, }, + } + self._test_step_function_trace_data_common( + eventbridge_event, + 3401561763239692811, + 10430178702434539423, + "a49ff3b7fb47b0b", ) - create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) - self.mock_send_segment.assert_called_with( - XraySubsegment.TRACE_KEY, - expected_context, + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_sqs(self): + """Test step function trace data extraction through SQS""" + sqs_event = { + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic:f1653ba3-2ff7-4c8e-9381-45a7a62f9708", + "Sns": { + "Type": "Notification", + "MessageId": "e39184ea-bfd8-5efa-96fe-e4a64a457ff7", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic", + "Subject": None, + "Message": "{}", + "Timestamp": "2025-03-13T15:01:49.942Z", + "SignatureVersion": "1", + "Signature": "WJHKq+pNOLgxa7+dB1dud02RM/30Jvz+KiMZzjRl38/Pphz90H24eGyIbnq3BJXYEyawFCHC6sq/5HcwXouGc5gbah6he+JpqXahMEs6cyMs2tg9SXxooRHEGv5iiZXKhnDcJYOrQ+iFExO9w+WFWfJjO2m/EDVVSYvuDjDV7mmTwAgEOD0zUvWpT7wOeKGG5Uk916Ppy3iMV7sCoHV/RwVikdhCWDDmxbdqteGduAXPdGESE/aj6kUx9ibEOKXyhC+7H1/j0tlhUchl6LZsTf1Gaiq2yEqKXKvsupcG3hRZ6FtIWP0jGlFhpW5EHc2oiHIVOsQceCYPqXYMCZvFuA==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-9c6465fa7f48f5cacd23014631ec1136.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic:f1653ba3-2ff7-4c8e-9381-45a7a62f9708", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": '{"Execution":{"Id":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:79478846-0cff-44de-91f5-02c96ff65762","StartTime":"2025-03-13T15:01:49.738Z","Name":"79478846-0cff-44de-91f5-02c96ff65762","RoleArn":"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j","RedriveCount":0},"StateMachine":{"Id":"arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine","Name":"abhinav-inner-state-machine"},"State":{"Name":"SNS Publish","EnteredTime":"2025-03-13T15:01:49.768Z","RetryCount":0},"RootExecutionId":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:79478846-0cff-44de-91f5-02c96ff65762","serverless-version":"v1"}', + } + }, + }, + } + ] + } + self._test_step_function_trace_data_common( + sqs_event, 3818106616964044169, 15912108710769293902, "3a4fd1a254eb514a" + ) + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_eventbridge_sqs(self): + """Test step function trace data extraction through EventBridge and SQS""" + eventbridge_sqs_event = { + "Records": [ + { + "messageId": "9ed082ad-2f4d-4309-ab99-9553d2be5613", + "receiptHandle": "AQEB6z7FatNIXbWOTC4Bx+udD0flrnT7XMehruTohl8O2KI2t9hvo5oxGIOhwcb+QtS5aRXsFE35TgGE8kZHlHK7Sa8jQUen6XmsPG7qB6BPdXjr0eunM2SDAtLj0mDSKx907VIKRYQG+qpI9ZyNK7Bi786oQIz2UkZGZru9zlXxJtAQiXBqfJ+OfTzhIwkPu04czU6lYfAbxdyNaBNdBEsTNJKPjquvcq1ZBVCHkn9L6wo8jha6XreoeS2WJ5N26ZLKtAl3wlSUByB92OKZU2mEuNboyY7bgK+nkx4N8fVVrafVXnY9YHuq60eQcZ/nusWFeJlVyN7NFypYP2IOn25xylltEACKbgUdEsFU2h5k7yI2DVk5eAt9vB6qmAJlgfkGsXG0SZrCADoIKXl9jpwajw==", + "body": '{"version":"0","id":"ff6d828b-b35e-abdf-64b6-6ea2cf698c0b","detail-type":"StepFunctionTask","source":"my.eventbridge","account":"425362996713","time":"2025-03-13T15:14:21Z","region":"sa-east-1","resources":["arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine","arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:fe087266-fe48-4a31-a21b-691f4e7ea985"],"detail":{"Message":"Hello from Step Functions!","TaskToken":"AQCEAAAAKgAAAAMAAAAAAAAAAfi3HMLTw3u9h0vSmkjyHlK1tv5bQUyA7i+6LIvrBWu+3S+DMuQ79JpMtAuCaMN/AGSuGPO7OPeTNA/9v7/kzAsLoPzwPhbrDPXP4SVF1YIO663PvtX/tEWxnAfwLqwDyx8G8VEsVLcmiiOafFCKJwn0OP/DoAWc0sjhWwRxIoQ0ipBGhOqU8rO8SFZVvxUbkosNejnhT7B6314pC89JZLpXU7SxFe+XrgN+uRAvFxsH/+RwDf94xk5hhtukH7HzhJKWN2WCtUISd84pM/1V7ppDuJ3FHgJT22xQIbEGA9Q4o+pLLehzE2SHCdo7eWYQqN+7BanxBNMI6kBMaf5nuh9izAp38lsrmHJyO8NvXgWg+F9hoTZX4RpV9CCwvRFrCRcCeDq4/uJzbvB4AwwA2q2Llm0X8yH0pKvPZ2v7pl4nCWdnEgj920I8AmBCuozbKP7gJRnAqfx3MnOSkpZTeGnHkp0ly8EevwCT2zX/1GQnCAx02kBaDJgUMputFeruMBzwVtlEVBFUUgaWbJwHzz2htuAw282pdATrKfv4VV1N962uLBJ32wd9a92rX7VXXToitvZGIvf/Z7cu4xfAzxQH1rIQ3M4ojkR9r48qoYtnYDlEf+BkIL8L4+xpbRFSBk3p","_datadog":{"Execution":{"Id":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:fe087266-fe48-4a31-a21b-691f4e7ea985","StartTime":"2025-03-13T15:14:21.730Z","Name":"fe087266-fe48-4a31-a21b-691f4e7ea985","RoleArn":"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j","RedriveCount":0},"StateMachine":{"Id":"arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine","Name":"abhinav-inner-state-machine"},"State":{"Name":"EventBridge PutEvents","EnteredTime":"2025-03-13T15:14:21.765Z","RetryCount":0},"Task":{"Token":"AQCEAAAAKgAAAAMAAAAAAAAAAfi3HMLTw3u9h0vSmkjyHlK1tv5bQUyA7i+6LIvrBWu+3S+DMuQ79JpMtAuCaMN/AGSuGPO7OPeTNA/9v7/kzAsLoPzwPhbrDPXP4SVF1YIO663PvtX/tEWxnAfwLqwDyx8G8VEsVLcmiiOafFCKJwn0OP/DoAWc0sjhWwRxIoQ0ipBGhOqU8rO8SFZVvxUbkosNejnhT7B6314pC89JZLpXU7SxFe+XrgN+uRAvFxsH/+RwDf94xk5hhtukH7HzhJKWN2WCtUISd84pM/1V7ppDuJ3FHgJT22xQIbEGA9Q4o+pLLehzE2SHCdo7eWYQqN+7BanxBNMI6kBMaf5nuh9izAp38lsrmHJyO8NvXgWg+F9hoTZX4RpV9CCwvRFrCRcCeDq4/uJzbvB4AwwA2q2Llm0X8yH0pKvPZ2v7pl4nCWdnEgj920I8AmBCuozbKP7gJRnAqfx3MnOSkpZTeGnHkp0ly8EevwCT2zX/1GQnCAx02kBaDJgUMputFeruMBzwVtlEVBFUUgaWbJwHzz2htuAw282pdATrKfv4VV1N962uLBJ32wd9a92rX7VXXToitvZGIvf/Z7cu4xfAzxQH1rIQ3M4ojkR9r48qoYtnYDlEf+BkIL8L4+xpbRFSBk3p"},"RootExecutionId":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:fe087266-fe48-4a31-a21b-691f4e7ea985","serverless-version":"v1"}}}', + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1741878862068", + "SenderId": "AROAWGCM4HXUUNHLDXVER:6145b5ba998f311c8ac27f5cade2b915", + "ApproximateFirstReceiveTimestamp": "1741878862075", + }, + "messageAttributes": {}, + "md5OfBody": "e5cf8197b304a4dd4fd5db8e4842484b", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:sa-east-1:425362996713:abhinav-q", + "awsRegion": "sa-east-1", + } + ] + } + self._test_step_function_trace_data_common( + eventbridge_sqs_event, + 6527209323865742984, + 14276854885394865473, + "2ee7d9862d048173", + ) + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_sns(self): + """Test step function trace data extraction through SNS""" + sns_event = { + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic:f1653ba3-2ff7-4c8e-9381-45a7a62f9708", + "Sns": { + "Type": "Notification", + "MessageId": "7bc0c17d-bf88-5ff4-af7f-a131463a0d90", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic", + "Subject": None, + "Message": "{}", + "Timestamp": "2025-03-13T15:19:14.245Z", + "SignatureVersion": "1", + "Signature": "r8RoYzq4uNcq0yj7sxcp8sTbFiDk8zqtocG7mJuE2MPVuR8O5eNg2ohofokUnC84xADlCq5k6ElP55lbbY36tQO+qDGdV6+TGN4bAL9FiQrDE6tQYYJdlv/sYE7iOOgnRBC9ljEdCIDNtQNGCfND/8JzatPg8KAy7xMRcLrGWu4xIMEysqNTz7rETfhdZjLQPssAht44KcoUJCH4/VuB+B9W1RhwA+M8Q3tqxzahIXzcgDM8OlmfkBlXo4FDVF3WUzjXLf9AMOg+66GupjQFtUpmRMkA8KXSV1HCso7e6nIIWtOnUoWeDDUfQPFFq4TNSlb6h2NuebaHdnW5nhxnJQ==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-9c6465fa7f48f5cacd23014631ec1136.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic:f1653ba3-2ff7-4c8e-9381-45a7a62f9708", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": '{"Execution":{"Id":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:11623e4f-70ee-4330-8fbe-955152dea54c","StartTime":"2025-03-13T15:19:14.019Z","Name":"11623e4f-70ee-4330-8fbe-955152dea54c","RoleArn":"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j","RedriveCount":0},"StateMachine":{"Id":"arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine","Name":"abhinav-inner-state-machine"},"State":{"Name":"SNS Publish","EnteredTime":"2025-03-13T15:19:14.061Z","RetryCount":0},"RootExecutionId":"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:11623e4f-70ee-4330-8fbe-955152dea54c","serverless-version":"v1"}', + } + }, + }, + } + ] + } + self._test_step_function_trace_data_common( + sns_event, 1459500239678510857, 13193042003602978730, "fafc98885fd4647" + ) + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_sns_sqs(self): + """Test step function trace data extraction through SNS and SQS""" + sns_sqs_event = { + "Records": [ + { + "messageId": "9ec3339f-cd1a-43ba-9681-3e9113b430d3", + "receiptHandle": "AQEBJ5gIvqEWQt39NHPMAoK57cGgKtrgTtckWeWdDRi2FeucYr6pBhNjzXuUrmoHZMozX1WaoABtfQ5+kX5ucDBpA2Ci3Q07Z4MYvA6X0Sw13HCkiBnLrHPmH/F3rUBjvdRkIIKqA2ACX58MdkaYGNpqsHTJHB613wa8z4zurK0u7eUIXrr+e+gtsuPD39hiWlJo7cpBVv7y178rzMX8gPQTnRJv1cjhCHENtjWTSmfFC5N+BIQNIcjFsTTDRSovZlNIfAEuS+uowgzk0DUyoTJD5nFTL8lQHeXGRCUQe58/UY9OwRXEFVPGZOQR4OI9Wa4Kf/keFypTk9YwC9DhSeKvzZ0wBvejyl1n0ztT45+XYoWfi0mxGWM5b7r9wT36RDmjnM6vszH/d3fhZSRPASxWBQ==", + "body": '{\n "Type" : "Notification",\n "MessageId" : "1f3078d0-c792-5cf3-a130-189c3b846a3f",\n "TopicArn" : "arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic",\n "Message" : "{}",\n "Timestamp" : "2025-03-13T15:29:26.348Z",\n "SignatureVersion" : "1",\n "Signature" : "mxOqAQ5o/isJrMS0PezHKRaA3g8Z/8YDbkToqhJub6I66LGtl+NYhyfTyllbgxvRP2XD2meKPRSgPI3nLyq8UHsWgyYwe3Tsv8QpRunCVE9Pebh+V1LGPWfjOiL0e+bnaj956QJD99560LJ6bzWP9QO584/zfOdcw6E5XQZfAI+pvEsf28Dy0WJO/lWTATRZDf8wGhmc7uKI1ZMsrOaNoUD8PXVqsI4yrJHxhzMb3SrC7YjI/PnNIbcn6ezwprbUdbZvyNAfJiE0k5IlppA089tMXC/ItgC7AgQhG9huPdKi5KdWGACK7gEwqmFwL+5T33sUXDaH2g58WhCs76pKEw==",\n "SigningCertURL" : "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-9c6465fa7f48f5cacd23014631ec1136.pem",\n "UnsubscribeURL" : "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:logs-to-traces-dev-topic:5f64545d-ae9a-4a5f-a7ee-798a0bd8519e",\n "MessageAttributes" : {\n "_datadog" : {"Type":"String","Value":"{\\"Execution\\":{\\"Id\\":\\"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:37ff72b8-0ee0-49e2-93c0-8a1764206a03\\",\\"StartTime\\":\\"2025-03-13T15:29:26.144Z\\",\\"Name\\":\\"37ff72b8-0ee0-49e2-93c0-8a1764206a03\\",\\"RoleArn\\":\\"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j\\",\\"RedriveCount\\":0},\\"StateMachine\\":{\\"Id\\":\\"arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-inner-state-machine\\",\\"Name\\":\\"abhinav-inner-state-machine\\"},\\"State\\":{\\"Name\\":\\"SNS Publish\\",\\"EnteredTime\\":\\"2025-03-13T15:29:26.182Z\\",\\"RetryCount\\":0},\\"RootExecutionId\\":\\"arn:aws:states:sa-east-1:425362996713:execution:abhinav-inner-state-machine:37ff72b8-0ee0-49e2-93c0-8a1764206a03\\",\\"serverless-version\\":\\"v1\\"}"}\n }\n}', + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1741879766424", + "SenderId": "AIDAIOA2GYWSHW4E2VXIO", + "ApproximateFirstReceiveTimestamp": "1741879766432", + }, + "messageAttributes": {}, + "md5OfBody": "52af59de28507d7e67324b46c95337d8", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:sa-east-1:425362996713:abhinav-q", + "awsRegion": "sa-east-1", + } + ] + } + self._test_step_function_trace_data_common( + sns_sqs_event, 5708348677301000120, 18223515719478572006, "45457f5f3fde3fa1" ) diff --git a/tests/test_trigger.py b/tests/test_trigger.py index be028a23..9cb088f1 100644 --- a/tests/test_trigger.py +++ b/tests/test_trigger.py @@ -9,6 +9,7 @@ get_event_source_arn, extract_trigger_tags, extract_http_status_code_tag, + is_step_function_event, ) from tests.utils import get_mock_context @@ -543,3 +544,68 @@ def test_extract_http_status_code_tag_from_response_object(self): response.status_code = 403 status_code = extract_http_status_code_tag(trigger_tags, response) self.assertEqual(status_code, "403") + + +class IsStepFunctionEvent(unittest.TestCase): + def test_is_step_function_event_jsonata(self): + event = { + "_datadog": { + "Execution": { + "Id": "665c417c-1237-4742-aaca-8b3becbb9e75", + "RedriveCount": 0, + }, + "StateMachine": {}, + "State": { + "Name": "my-awesome-state", + "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", + "RetryCount": 0, + }, + "x-datadog-trace-id": "5821803790426892636", + "x-datadog-tags": "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000", + "serverless-version": "v1", + } + } + self.assertTrue(is_step_function_event(event)) + + def test_is_step_function_event_jsonpath(self): + event = { + "Execution": { + "Id": "665c417c-1237-4742-aaca-8b3becbb9e75", + "RedriveCount": 0, + }, + "StateMachine": {}, + "State": { + "Name": "my-awesome-state", + "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", + "RetryCount": 0, + }, + } + self.assertTrue(is_step_function_event(event)) + + def test_is_step_function_event_legacy_lambda(self): + event = { + "Payload": { + "Execution": { + "Id": "665c417c-1237-4742-aaca-8b3becbb9e75", + "RedriveCount": 0, + }, + "StateMachine": {}, + "State": { + "Name": "my-awesome-state", + "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", + "RetryCount": 0, + }, + } + } + self.assertTrue(is_step_function_event(event)) + + def test_is_step_function_event_dd_header(self): + event = { + "_datadog": { + "x-datadog-trace-id": "5821803790426892636", + "x-datadog-parent-id": "5821803790426892636", + "x-datadog-tags": "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000", + "x-datadog-sampling-priority": "1", + } + } + self.assertFalse(is_step_function_event(event)) From 92ec3b8e36b688ae972b1fe23c9737a6488668fa Mon Sep 17 00:00:00 2001 From: Nicholas Hulston Date: Wed, 19 Mar 2025 07:43:20 -0700 Subject: [PATCH 13/26] Submit metric for DynamoDB Stream Type (#578) --- datadog_lambda/metric.py | 14 ++++++++++++++ datadog_lambda/span_pointers.py | 4 ++++ .../snapshots/logs/async-metrics_python310.log | 9 +++++++++ .../snapshots/logs/async-metrics_python311.log | 9 +++++++++ .../snapshots/logs/async-metrics_python312.log | 9 +++++++++ .../snapshots/logs/async-metrics_python313.log | 9 +++++++++ .../snapshots/logs/async-metrics_python38.log | 9 +++++++++ .../snapshots/logs/async-metrics_python39.log | 9 +++++++++ .../snapshots/logs/sync-metrics_python310.log | 9 +++++++++ .../snapshots/logs/sync-metrics_python311.log | 9 +++++++++ .../snapshots/logs/sync-metrics_python312.log | 11 ++++++++++- .../snapshots/logs/sync-metrics_python313.log | 11 ++++++++++- .../snapshots/logs/sync-metrics_python38.log | 9 +++++++++ .../snapshots/logs/sync-metrics_python39.log | 11 ++++++++++- 14 files changed, 129 insertions(+), 3 deletions(-) diff --git a/datadog_lambda/metric.py b/datadog_lambda/metric.py index 6389c268..f9c67a26 100644 --- a/datadog_lambda/metric.py +++ b/datadog_lambda/metric.py @@ -188,3 +188,17 @@ def submit_errors_metric(lambda_context): lambda_context (object): Lambda context dict passed to the function by AWS """ submit_enhanced_metric("errors", lambda_context) + + +def submit_dynamodb_stream_type_metric(event): + stream_view_type = ( + event.get("Records", [{}])[0].get("dynamodb", {}).get("StreamViewType") + ) + if stream_view_type: + lambda_metric( + "datadog.serverless.dynamodb.stream.type", + 1, + timestamp=None, + tags=[f"streamtype:{stream_view_type}"], + force_async=True, + ) diff --git a/datadog_lambda/span_pointers.py b/datadog_lambda/span_pointers.py index e111469e..40d959e6 100644 --- a/datadog_lambda/span_pointers.py +++ b/datadog_lambda/span_pointers.py @@ -6,6 +6,8 @@ from ddtrace._trace._span_pointer import _SpanPointerDirection from ddtrace._trace._span_pointer import _SpanPointerDescription + +from datadog_lambda.metric import submit_dynamodb_stream_type_metric from datadog_lambda.trigger import EventTypes @@ -28,6 +30,8 @@ def calculate_span_pointers( return _calculate_s3_span_pointers_for_event(event) elif event_source.equals(EventTypes.DYNAMODB): + # Temporary metric. TODO eventually remove(@nhulston) + submit_dynamodb_stream_type_metric(event) return _calculate_dynamodb_span_pointers_for_event(event) except Exception as e: diff --git a/tests/integration/snapshots/logs/async-metrics_python310.log b/tests/integration/snapshots/logs/async-metrics_python310.log index ed0d3b43..24d3fb5b 100644 --- a/tests/integration/snapshots/logs/async-metrics_python310.log +++ b/tests/integration/snapshots/logs/async-metrics_python310.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python310_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python310_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/async-metrics_python311.log b/tests/integration/snapshots/logs/async-metrics_python311.log index b57a1b5a..e4fa66bc 100644 --- a/tests/integration/snapshots/logs/async-metrics_python311.log +++ b/tests/integration/snapshots/logs/async-metrics_python311.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python311_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python311_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/async-metrics_python312.log b/tests/integration/snapshots/logs/async-metrics_python312.log index 1b7e4b08..0d632c6c 100644 --- a/tests/integration/snapshots/logs/async-metrics_python312.log +++ b/tests/integration/snapshots/logs/async-metrics_python312.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python312_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python312_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/async-metrics_python313.log b/tests/integration/snapshots/logs/async-metrics_python313.log index 32342559..09070709 100644 --- a/tests/integration/snapshots/logs/async-metrics_python313.log +++ b/tests/integration/snapshots/logs/async-metrics_python313.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python313_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python313_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/async-metrics_python38.log b/tests/integration/snapshots/logs/async-metrics_python38.log index 9dc9edf6..4a506930 100644 --- a/tests/integration/snapshots/logs/async-metrics_python38.log +++ b/tests/integration/snapshots/logs/async-metrics_python38.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python38_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python38_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/async-metrics_python39.log b/tests/integration/snapshots/logs/async-metrics_python39.log index 89e5d227..54081402 100644 --- a/tests/integration/snapshots/logs/async-metrics_python39.log +++ b/tests/integration/snapshots/logs/async-metrics_python39.log @@ -188,6 +188,15 @@ START "dd_lambda_layer:datadog-python39_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python39_X.X.X" + ] +} { "m": "hello.dog", "v": 1, diff --git a/tests/integration/snapshots/logs/sync-metrics_python310.log b/tests/integration/snapshots/logs/sync-metrics_python310.log index 6eab44c2..e2569775 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python310.log +++ b/tests/integration/snapshots/logs/sync-metrics_python310.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python310_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python310_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { diff --git a/tests/integration/snapshots/logs/sync-metrics_python311.log b/tests/integration/snapshots/logs/sync-metrics_python311.log index 87cd6a6a..69d4a695 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python311.log +++ b/tests/integration/snapshots/logs/sync-metrics_python311.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python311_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python311_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { diff --git a/tests/integration/snapshots/logs/sync-metrics_python312.log b/tests/integration/snapshots/logs/sync-metrics_python312.log index 41b5a71c..49bae0a2 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python312.log +++ b/tests/integration/snapshots/logs/sync-metrics_python312.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python312_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python312_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { @@ -546,7 +555,6 @@ HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","A ] } HTTP POST https://api.datadoghq.com/api/v1/distribution_points Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","Content-Encoding:deflate","Content-Length:XXXX","Content-Type:application/json","DD-API-KEY:XXXX","User-Agent:datadogpy/XX (python XX; os linux; arch XXXX)","traceparent:XXX","tracestate:XXX -END Duration: XXXX ms Memory Used: XXXX MB { "traces": [ [ @@ -585,6 +593,7 @@ END Duration: XXXX ms Memory Used: XXXX MB ] ] } +END Duration: XXXX ms Memory Used: XXXX MB START { "m": "aws.lambda.enhanced.invocations", diff --git a/tests/integration/snapshots/logs/sync-metrics_python313.log b/tests/integration/snapshots/logs/sync-metrics_python313.log index 439e44d6..2f461f6f 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python313.log +++ b/tests/integration/snapshots/logs/sync-metrics_python313.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python313_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python313_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { @@ -1302,7 +1311,6 @@ HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","A ] } HTTP POST https://api.datadoghq.com/api/v1/distribution_points Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","Content-Encoding:deflate","Content-Length:XXXX","Content-Type:application/json","DD-API-KEY:XXXX","User-Agent:datadogpy/XX (python XX; os linux; arch XXXX)","traceparent:XXX","tracestate:XXX -END Duration: XXXX ms Memory Used: XXXX MB { "traces": [ [ @@ -1341,6 +1349,7 @@ END Duration: XXXX ms Memory Used: XXXX MB ] ] } +END Duration: XXXX ms Memory Used: XXXX MB START { "m": "aws.lambda.enhanced.invocations", diff --git a/tests/integration/snapshots/logs/sync-metrics_python38.log b/tests/integration/snapshots/logs/sync-metrics_python38.log index b30289ca..83e33d33 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python38.log +++ b/tests/integration/snapshots/logs/sync-metrics_python38.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python38_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python38_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { diff --git a/tests/integration/snapshots/logs/sync-metrics_python39.log b/tests/integration/snapshots/logs/sync-metrics_python39.log index 772ea5d1..0a433c34 100644 --- a/tests/integration/snapshots/logs/sync-metrics_python39.log +++ b/tests/integration/snapshots/logs/sync-metrics_python39.log @@ -207,6 +207,15 @@ START "dd_lambda_layer:datadog-python39_X.X.X" ] } +{ + "m": "datadog.serverless.dynamodb.stream.type", + "v": 1, + "e": XXXX, + "t": [ + "streamtype:NEW_AND_OLD_IMAGES", + "dd_lambda_layer:datadog-python39_X.X.X" + ] +} HTTP GET https://datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","User-Agent:python-requests/X.X.X","traceparent:XXX","tracestate:XXX { @@ -368,6 +377,7 @@ HTTP GET https://www.datadoghq.com/ Headers: ["Accept-Encoding:gzip, deflate","A ] } HTTP POST https://api.datadoghq.com/api/v1/distribution_points Headers: ["Accept-Encoding:gzip, deflate","Accept:*/*","Connection:keep-alive","Content-Encoding:deflate","Content-Length:XXXX","Content-Type:application/json","DD-API-KEY:XXXX","User-Agent:datadogpy/XX (python XX; os linux; arch XXXX)","traceparent:XXX","tracestate:XXX +END Duration: XXXX ms Memory Used: XXXX MB { "traces": [ [ @@ -406,7 +416,6 @@ HTTP POST https://api.datadoghq.com/api/v1/distribution_points Headers: ["Accept ] ] } -END Duration: XXXX ms Memory Used: XXXX MB START { "m": "aws.lambda.enhanced.invocations", From 71c14e370e9f2a086ff676705a2d19b045b15f20 Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Mon, 24 Mar 2025 08:38:10 -0700 Subject: [PATCH 14/26] Remove __future__ import. (#579) --- datadog_lambda/handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog_lambda/handler.py b/datadog_lambda/handler.py index 433d9b92..4f12b1ad 100644 --- a/datadog_lambda/handler.py +++ b/datadog_lambda/handler.py @@ -3,7 +3,6 @@ # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2020 Datadog, Inc. -from __future__ import absolute_import from importlib import import_module import os From 95f9aed2b884f6205eb3e346ef6498bc0ae6aa4e Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Thu, 27 Mar 2025 15:28:52 -0400 Subject: [PATCH 15/26] Revert "Use sam/build-python images for building layers. (#577)" (#580) This reverts commit 5a55fe45a49b8d7e6c8db0734f203bdff648f383. --- scripts/build_layers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_layers.sh b/scripts/build_layers.sh index 23941b7a..a0d6ee39 100755 --- a/scripts/build_layers.sh +++ b/scripts/build_layers.sh @@ -61,7 +61,7 @@ function docker_build_zip { # between different python runtimes. temp_dir=$(mktemp -d) docker buildx build -t datadog-lambda-python-${arch}:$1 . --no-cache \ - --build-arg image=public.ecr.aws/sam/build-python$1:1 \ + --build-arg image=public.ecr.aws/docker/library/python:$1 \ --build-arg runtime=python$1 \ --platform linux/${arch} \ --progress=plain \ From 2320c0a909413c8b66ba501fc969c043fbd27ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 28 Mar 2025 16:28:25 -0400 Subject: [PATCH 16/26] chore: lazy load some imports (#581) * lazy load metrics also hashlib right away * black * update invocation metric to be error metric * lazyload `base64` * patch right call --- datadog_lambda/api.py | 2 +- datadog_lambda/tracing.py | 10 ++++++++-- datadog_lambda/trigger.py | 3 ++- datadog_lambda/wrapper.py | 24 ++++++++++++++++-------- tests/test_wrapper.py | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index c539ea05..fd3e2c17 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -1,6 +1,5 @@ import os import logging -import base64 logger = logging.getLogger(__name__) KMS_ENCRYPTION_CONTEXT_KEY = "LambdaFunctionName" @@ -9,6 +8,7 @@ def decrypt_kms_api_key(kms_client, ciphertext): from botocore.exceptions import ClientError + import base64 """ Decodes and deciphers the base64-encoded ciphertext given as a parameter using KMS. diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 0fae76dd..9a27673c 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -2,10 +2,8 @@ # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. -import hashlib import logging import os -import base64 import traceback import ujson as json from datetime import datetime, timezone @@ -259,6 +257,8 @@ def extract_context_from_sqs_or_sns_event_or_context(event, lambda_context): dd_json_data = None dd_json_data_type = dd_payload.get("Type") or dd_payload.get("dataType") if dd_json_data_type == "Binary": + import base64 + dd_json_data = dd_payload.get("binaryValue") or dd_payload.get("Value") if dd_json_data: dd_json_data = base64.b64decode(dd_json_data) @@ -373,6 +373,8 @@ def extract_context_from_kinesis_event(event, lambda_context): return extract_context_from_lambda_context(lambda_context) data = kinesis.get("data") if data: + import base64 + b64_bytes = data.encode("ascii") str_bytes = base64.b64decode(b64_bytes) data_str = str_bytes.decode("ascii") @@ -387,6 +389,8 @@ def extract_context_from_kinesis_event(event, lambda_context): def _deterministic_sha256_hash(s: str, part: str) -> int: + import hashlib + sha256_hash = hashlib.sha256(s.encode()).hexdigest() # First two chars is '0b'. zfill to ensure 256 bits, but we only care about the first 128 bits binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256) @@ -551,6 +555,8 @@ def get_injected_authorizer_data(event, is_http_api) -> dict: if not dd_data_raw: return None + import base64 + injected_data = json.loads(base64.b64decode(dd_data_raw)) # Lambda authorizer's results can be cached. But the payload will still have the injected diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 708138bf..8090e36e 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -3,7 +3,6 @@ # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. -import base64 import gzip import ujson as json from io import BytesIO, BufferedReader @@ -242,6 +241,8 @@ def parse_event_source_arn(source: _EventSource, event: dict, context: Any) -> s # e.g. arn:aws:logs:us-west-1:123456789012:log-group:/my-log-group-xyz if source.event_type == EventTypes.CLOUDWATCH_LOGS: + import base64 + with gzip.GzipFile( fileobj=BytesIO(base64.b64decode(event.get("awslogs", {}).get("data"))) ) as decompress_stream: diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index 5641bd15..8c1914e3 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -2,7 +2,6 @@ # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. -import base64 import os import logging import traceback @@ -23,11 +22,6 @@ XraySubsegment, Headers, ) -from datadog_lambda.metric import ( - flush_stats, - submit_invocations_metric, - submit_errors_metric, -) from datadog_lambda.module_name import modify_module_name from datadog_lambda.patch import patch_all from datadog_lambda.span_pointers import calculate_span_pointers @@ -248,7 +242,11 @@ def __call__(self, event, context, **kwargs): self.response = self.func(event, context, **kwargs) return self.response except Exception: - submit_errors_metric(context) + if not should_use_extension: + from datadog_lambda.metric import submit_errors_metric + + submit_errors_metric(context) + if self.span: self.span.set_traceback() raise @@ -274,6 +272,9 @@ def _inject_authorizer_span_headers(self, request_id): injected_headers[Headers.Parent_Span_Finish_Time] = finish_time_ns if request_id is not None: injected_headers[Headers.Authorizing_Request_Id] = request_id + + import base64 + datadog_data = base64.b64encode( json.dumps(injected_headers, escape_forward_slashes=False).encode() ).decode() @@ -284,7 +285,12 @@ def _before(self, event, context): try: self.response = None set_cold_start(init_timestamp_ns) - submit_invocations_metric(context) + + if not should_use_extension: + from datadog_lambda.metric import submit_invocations_metric + + submit_invocations_metric(context) + self.trigger_tags = extract_trigger_tags(event, context) # Extract Datadog trace context and source from incoming requests dd_context, trace_context_source, event_source = extract_dd_trace_context( @@ -383,6 +389,8 @@ def _after(self, event, context): logger.debug("Failed to create cold start spans. %s", e) if not self.flush_to_log or should_use_extension: + from datadog_lambda.metric import flush_stats + flush_stats(context) if should_use_extension and self.local_testing_mode: # when testing locally, the extension does not know when an diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index f47285e6..4b243036 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -470,7 +470,7 @@ def lambda_handler(event, context): self.mock_write_metric_point_to_stdout.assert_not_called() def test_only_one_wrapper_in_use(self): - patcher = patch("datadog_lambda.wrapper.submit_invocations_metric") + patcher = patch("datadog_lambda.metric.submit_invocations_metric") self.mock_submit_invocations_metric = patcher.start() self.addCleanup(patcher.stop) From 7cd2baf6361d0a02b8b48e084fbc828a7db66b00 Mon Sep 17 00:00:00 2001 From: Joey Zhao <5253430+joeyzhao2018@users.noreply.github.com> Date: Fri, 4 Apr 2025 10:49:52 -0400 Subject: [PATCH 17/26] v6.107.0 (#583) --- datadog_lambda/version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog_lambda/version.py b/datadog_lambda/version.py index 0c8d879b..702691d8 100644 --- a/datadog_lambda/version.py +++ b/datadog_lambda/version.py @@ -1 +1 @@ -__version__ = "6.106.0" +__version__ = "6.107.0" diff --git a/pyproject.toml b/pyproject.toml index 8db5e352..165a8cbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datadog_lambda" -version = "6.106.0" +version = "6.107.0" description = "The Datadog AWS Lambda Library" authors = ["Datadog, Inc. "] license = "Apache-2.0" From 8207aa224692c1f3eb72cc5cb8dc10251e5368d2 Mon Sep 17 00:00:00 2001 From: Sam Brenner <106700075+sabrenner@users.noreply.github.com> Date: Wed, 16 Apr 2025 13:59:37 -0400 Subject: [PATCH 18/26] revert llmobs api key and forced agentless (#585) --- datadog_lambda/wrapper.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index 8c1914e3..e81b1baa 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -49,14 +49,10 @@ if profiling_env_var: from ddtrace.profiling import profiler -llmobs_api_key = None llmobs_env_var = os.environ.get("DD_LLMOBS_ENABLED", "false").lower() in ("true", "1") if llmobs_env_var: - from datadog_lambda.api import get_api_key from ddtrace.llmobs import LLMObs - llmobs_api_key = get_api_key() - logger = logging.getLogger(__name__) DD_FLUSH_TO_LOG = "DD_FLUSH_TO_LOG" @@ -226,10 +222,7 @@ def __init__(self, func): # Enable LLM Observability if llmobs_env_var: - LLMObs.enable( - agentless_enabled=True, - api_key=llmobs_api_key, - ) + LLMObs.enable() logger.debug("datadog_lambda_wrapper initialized") except Exception as e: From 33cd5bf9e05bd3e3a35450c42b522ce1280b9add Mon Sep 17 00:00:00 2001 From: Rey Abolofia Date: Thu, 24 Apr 2025 11:30:38 -0700 Subject: [PATCH 19/26] Add new region ap-southeast-7. (#586) --- ci/datasources/regions.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/datasources/regions.yaml b/ci/datasources/regions.yaml index 93816ce9..f74c62b8 100644 --- a/ci/datasources/regions.yaml +++ b/ci/datasources/regions.yaml @@ -12,6 +12,7 @@ regions: - code: "ap-southeast-3" - code: "ap-southeast-4" - code: "ap-southeast-5" + - code: "ap-southeast-7" - code: "ap-northeast-1" - code: "ap-northeast-2" - code: "ap-northeast-3" From 72caf0ae5d29f82649b57fb6075dcf9a4f7a436b Mon Sep 17 00:00:00 2001 From: Nicholas Hulston Date: Tue, 29 Apr 2025 14:10:13 -0400 Subject: [PATCH 20/26] add mx-central-1 region (#587) --- ci/datasources/regions.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/datasources/regions.yaml b/ci/datasources/regions.yaml index f74c62b8..d41bade3 100644 --- a/ci/datasources/regions.yaml +++ b/ci/datasources/regions.yaml @@ -29,4 +29,5 @@ regions: - code: "il-central-1" - code: "me-south-1" - code: "me-central-1" + - code: "mx-central-1" - code: "sa-east-1" From 1f8d3fd96a7dce844a3ecaea4ec2f41be1e73f56 Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Tue, 6 May 2025 11:45:18 -0400 Subject: [PATCH 21/26] feat: Correct FIPS-mode metrics (#588) - Our `dogstatsd` client now supports timestamps for the metrics that it will send. - This unblocks us to always send metrics to the extension, even if they have a timestamp. Confirmed that this actually works now with both bottlecap and the go agent. - Refactored the metrics workflow to have an explicit choice of metrics handlers (Extension, Forwarder, Datadog API, or, for some FIPS usecases, No Handler). - Added a `DD_LAMBDA_FIPS_MODE` flag which allows FIPS-mode logic to be enabled in commercial regions or disabled in govcloud regions. - The new FIPS mode is used for Datadog API Key secret lookup and for metrics handling decisions. ### Breaking Change Since the `DD_LAMBDA_FIPS_MODE` defaults to `true` in govcloud, direct metrics submission there (without an Extension or a Forwarder) will now be disabled. --- datadog_lambda/api.py | 22 ++-- datadog_lambda/dogstatsd.py | 27 +++-- datadog_lambda/fips.py | 19 ++++ datadog_lambda/metric.py | 149 +++++++++++++++----------- datadog_lambda/stats_writer.py | 2 +- datadog_lambda/statsd_writer.py | 6 +- datadog_lambda/thread_stats_writer.py | 3 +- tests/test_api.py | 26 ++++- tests/test_dogstatsd.py | 20 ++-- tests/test_metric.py | 123 +++++++++++++-------- 10 files changed, 254 insertions(+), 143 deletions(-) create mode 100644 datadog_lambda/fips.py diff --git a/datadog_lambda/api.py b/datadog_lambda/api.py index fd3e2c17..d1cee4e4 100644 --- a/datadog_lambda/api.py +++ b/datadog_lambda/api.py @@ -1,5 +1,7 @@ -import os import logging +import os + +from datadog_lambda.fips import fips_mode_enabled logger = logging.getLogger(__name__) KMS_ENCRYPTION_CONTEXT_KEY = "LambdaFunctionName" @@ -7,9 +9,10 @@ def decrypt_kms_api_key(kms_client, ciphertext): - from botocore.exceptions import ClientError import base64 + from botocore.exceptions import ClientError + """ Decodes and deciphers the base64-encoded ciphertext given as a parameter using KMS. For this to work properly, the Lambda function must have the appropriate IAM permissions. @@ -63,10 +66,9 @@ def get_api_key() -> str: DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", "")) LAMBDA_REGION = os.environ.get("AWS_REGION", "") - is_gov_region = LAMBDA_REGION.startswith("us-gov-") - if is_gov_region: + if fips_mode_enabled: logger.debug( - "Govcloud region detected. Using FIPs endpoints for secrets management." + "FIPS mode is enabled, using FIPS endpoints for secrets management." ) if DD_API_KEY_SECRET_ARN: @@ -80,7 +82,7 @@ def get_api_key() -> str: return "" endpoint_url = ( f"https://secretsmanager-fips.{secrets_region}.amazonaws.com" - if is_gov_region + if fips_mode_enabled else None ) secrets_manager_client = _boto3_client( @@ -92,7 +94,9 @@ def get_api_key() -> str: elif DD_API_KEY_SSM_NAME: # SSM endpoints: https://docs.aws.amazon.com/general/latest/gr/ssm.html fips_endpoint = ( - f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None + f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" + if fips_mode_enabled + else None ) ssm_client = _boto3_client("ssm", endpoint_url=fips_endpoint) api_key = ssm_client.get_parameter( @@ -101,7 +105,9 @@ def get_api_key() -> str: elif DD_KMS_API_KEY: # KMS endpoints: https://docs.aws.amazon.com/general/latest/gr/kms.html fips_endpoint = ( - f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None + f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" + if fips_mode_enabled + else None ) kms_client = _boto3_client("kms", endpoint_url=fips_endpoint) api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY) diff --git a/datadog_lambda/dogstatsd.py b/datadog_lambda/dogstatsd.py index a627492d..f30a2039 100644 --- a/datadog_lambda/dogstatsd.py +++ b/datadog_lambda/dogstatsd.py @@ -1,11 +1,10 @@ +import errno import logging import os -import socket -import errno import re +import socket from threading import Lock - MIN_SEND_BUFFER_SIZE = 32 * 1024 log = logging.getLogger("datadog_lambda.dogstatsd") @@ -55,14 +54,21 @@ def _get_udp_socket(cls, host, port): return sock - def distribution(self, metric, value, tags=None): + def distribution(self, metric, value, tags=None, timestamp=None): """ - Send a global distribution value, optionally setting tags. + Send a global distribution value, optionally setting tags. The optional + timestamp should be an integer representing seconds since the epoch + (January 1, 1970, 00:00:00 UTC). >>> statsd.distribution("uploaded.file.size", 1445) >>> statsd.distribution("album.photo.count", 26, tags=["gender:female"]) + >>> statsd.distribution( + >>> "historic.file.count", + >>> 5, + >>> timestamp=int(datetime(2020, 2, 14, 12, 0, 0).timestamp()), + >>> ) """ - self._report(metric, "d", value, tags) + self._report(metric, "d", value, tags, timestamp) def close_socket(self): """ @@ -84,20 +90,21 @@ def normalize_tags(self, tag_list): for tag in tag_list ] - def _serialize_metric(self, metric, metric_type, value, tags): + def _serialize_metric(self, metric, metric_type, value, tags, timestamp): # Create/format the metric packet - return "%s:%s|%s%s" % ( + return "%s:%s|%s%s%s" % ( metric, value, metric_type, ("|#" + ",".join(self.normalize_tags(tags))) if tags else "", + ("|T" + str(timestamp)) if timestamp is not None else "", ) - def _report(self, metric, metric_type, value, tags): + def _report(self, metric, metric_type, value, tags, timestamp): if value is None: return - payload = self._serialize_metric(metric, metric_type, value, tags) + payload = self._serialize_metric(metric, metric_type, value, tags, timestamp) # Send it self._send_to_server(payload) diff --git a/datadog_lambda/fips.py b/datadog_lambda/fips.py new file mode 100644 index 00000000..8442ddd9 --- /dev/null +++ b/datadog_lambda/fips.py @@ -0,0 +1,19 @@ +import logging +import os + +is_gov_region = os.environ.get("AWS_REGION", "").startswith("us-gov-") + +fips_mode_enabled = ( + os.environ.get( + "DD_LAMBDA_FIPS_MODE", + "true" if is_gov_region else "false", + ).lower() + == "true" +) + +if is_gov_region or fips_mode_enabled: + logger = logging.getLogger(__name__) + logger.debug( + "Python Lambda Layer FIPS mode is %s.", + "enabled" if fips_mode_enabled else "not enabled", + ) diff --git a/datadog_lambda/metric.py b/datadog_lambda/metric.py index f9c67a26..0c18b517 100644 --- a/datadog_lambda/metric.py +++ b/datadog_lambda/metric.py @@ -3,37 +3,66 @@ # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. +import enum +import logging import os import time -import logging -import ujson as json from datetime import datetime, timedelta +import ujson as json + from datadog_lambda.extension import should_use_extension -from datadog_lambda.tags import get_enhanced_metrics_tags, dd_lambda_layer_tag +from datadog_lambda.fips import fips_mode_enabled +from datadog_lambda.tags import dd_lambda_layer_tag, get_enhanced_metrics_tags logger = logging.getLogger(__name__) -lambda_stats = None -extension_thread_stats = None -flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true" +class MetricsHandler(enum.Enum): + EXTENSION = "extension" + FORWARDER = "forwarder" + DATADOG_API = "datadog_api" + NO_METRICS = "no_metrics" + -if should_use_extension: +def _select_metrics_handler(): + if should_use_extension: + return MetricsHandler.EXTENSION + if os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true": + return MetricsHandler.FORWARDER + + if fips_mode_enabled: + logger.debug( + "With FIPS mode enabled, the Datadog API metrics handler is unavailable." + ) + return MetricsHandler.NO_METRICS + + return MetricsHandler.DATADOG_API + + +metrics_handler = _select_metrics_handler() +logger.debug("identified primary metrics handler as %s", metrics_handler) + + +lambda_stats = None +if metrics_handler == MetricsHandler.EXTENSION: from datadog_lambda.statsd_writer import StatsDWriter lambda_stats = StatsDWriter() -else: + +elif metrics_handler == MetricsHandler.DATADOG_API: # Periodical flushing in a background thread is NOT guaranteed to succeed # and leads to data loss. When disabled, metrics are only flushed at the # end of invocation. To make metrics submitted from a long-running Lambda # function available sooner, consider using the Datadog Lambda extension. - from datadog_lambda.thread_stats_writer import ThreadStatsWriter from datadog_lambda.api import init_api + from datadog_lambda.thread_stats_writer import ThreadStatsWriter + flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true" init_api() lambda_stats = ThreadStatsWriter(flush_in_thread) + enhanced_metrics_enabled = ( os.environ.get("DD_ENHANCED_METRICS", "true").lower() == "true" ) @@ -44,16 +73,19 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal Submit a data point to Datadog distribution metrics. https://docs.datadoghq.com/graphing/metrics/distributions/ - When DD_FLUSH_TO_LOG is True, write metric to log, and - wait for the Datadog Log Forwarder Lambda function to submit - the metrics asynchronously. + If the Datadog Lambda Extension is present, metrics are submitted to its + dogstatsd endpoint. + + When DD_FLUSH_TO_LOG is True or force_async is True, write metric to log, + and wait for the Datadog Log Forwarder Lambda function to submit the + metrics asynchronously. Otherwise, the metrics will be submitted to the Datadog API periodically and at the end of the function execution in a background thread. - Note that if the extension is present, it will override the DD_FLUSH_TO_LOG value - and always use the layer to send metrics to the extension + Note that if the extension is present, it will override the DD_FLUSH_TO_LOG + value and always use the layer to send metrics to the extension """ if not metric_name or not isinstance(metric_name, str): logger.warning( @@ -71,56 +103,54 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal ) return - flush_to_logs = os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true" tags = [] if tags is None else list(tags) tags.append(dd_lambda_layer_tag) - if should_use_extension and timestamp is not None: - # The extension does not support timestamps for distributions so we create a - # a thread stats writer to submit metrics with timestamps to the API - timestamp_ceiling = int( - (datetime.now() - timedelta(hours=4)).timestamp() - ) # 4 hours ago - if isinstance(timestamp, datetime): - timestamp = int(timestamp.timestamp()) - if timestamp_ceiling > timestamp: - logger.warning( - "Timestamp %s is older than 4 hours, not submitting metric %s", - timestamp, - metric_name, - ) - return - global extension_thread_stats - if extension_thread_stats is None: - from datadog_lambda.thread_stats_writer import ThreadStatsWriter - from datadog_lambda.api import init_api - - init_api() - extension_thread_stats = ThreadStatsWriter(flush_in_thread) - - extension_thread_stats.distribution( - metric_name, value, tags=tags, timestamp=timestamp - ) - return + if metrics_handler == MetricsHandler.EXTENSION: + if timestamp is not None: + if isinstance(timestamp, datetime): + timestamp = int(timestamp.timestamp()) + + timestamp_floor = int((datetime.now() - timedelta(hours=4)).timestamp()) + if timestamp < timestamp_floor: + logger.warning( + "Timestamp %s is older than 4 hours, not submitting metric %s", + timestamp, + metric_name, + ) + return - if should_use_extension: logger.debug( "Sending metric %s value %s to Datadog via extension", metric_name, value ) lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp) + + elif force_async or (metrics_handler == MetricsHandler.FORWARDER): + write_metric_point_to_stdout(metric_name, value, timestamp=timestamp, tags=tags) + + elif metrics_handler == MetricsHandler.DATADOG_API: + lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp) + + elif metrics_handler == MetricsHandler.NO_METRICS: + logger.debug( + "Metric %s cannot be submitted because the metrics handler is disabled", + metric_name, + ), + else: - if flush_to_logs or force_async: - write_metric_point_to_stdout( - metric_name, value, timestamp=timestamp, tags=tags - ) - else: - lambda_stats.distribution( - metric_name, value, tags=tags, timestamp=timestamp - ) + # This should be qutie impossible, but let's at least log a message if + # it somehow happens. + logger.debug( + "Metric %s cannot be submitted because the metrics handler is not configured: %s", + metric_name, + metrics_handler, + ) -def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]): +def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=None): """Writes the specified metric point to standard output""" + tags = tags or [] + logger.debug( "Sending metric %s value %s to Datadog via log forwarder", metric_name, value ) @@ -138,19 +168,8 @@ def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]): def flush_stats(lambda_context=None): - lambda_stats.flush() - - if extension_thread_stats is not None: - tags = None - if lambda_context is not None: - tags = get_enhanced_metrics_tags(lambda_context) - split_arn = lambda_context.invoked_function_arn.split(":") - if len(split_arn) > 7: - # Get rid of the alias - split_arn.pop() - arn = ":".join(split_arn) - tags.append("function_arn:" + arn) - extension_thread_stats.flush(tags) + if lambda_stats is not None: + lambda_stats.flush() def submit_enhanced_metric(metric_name, lambda_context): diff --git a/datadog_lambda/stats_writer.py b/datadog_lambda/stats_writer.py index d3919c30..563b1ae9 100644 --- a/datadog_lambda/stats_writer.py +++ b/datadog_lambda/stats_writer.py @@ -1,5 +1,5 @@ class StatsWriter: - def distribution(self, metric_name, value, tags=[], timestamp=None): + def distribution(self, metric_name, value, tags=None, timestamp=None): raise NotImplementedError() def flush(self): diff --git a/datadog_lambda/statsd_writer.py b/datadog_lambda/statsd_writer.py index 33843dc6..4aaab8d5 100644 --- a/datadog_lambda/statsd_writer.py +++ b/datadog_lambda/statsd_writer.py @@ -1,5 +1,5 @@ -from datadog_lambda.stats_writer import StatsWriter from datadog_lambda.dogstatsd import statsd +from datadog_lambda.stats_writer import StatsWriter class StatsDWriter(StatsWriter): @@ -7,8 +7,8 @@ class StatsDWriter(StatsWriter): Writes distribution metrics using StatsD protocol """ - def distribution(self, metric_name, value, tags=[], timestamp=None): - statsd.distribution(metric_name, value, tags=tags) + def distribution(self, metric_name, value, tags=None, timestamp=None): + statsd.distribution(metric_name, value, tags=tags, timestamp=timestamp) def flush(self): pass diff --git a/datadog_lambda/thread_stats_writer.py b/datadog_lambda/thread_stats_writer.py index 422a9a0a..f21ee31f 100644 --- a/datadog_lambda/thread_stats_writer.py +++ b/datadog_lambda/thread_stats_writer.py @@ -3,6 +3,7 @@ # Make sure that this package would always be lazy-loaded/outside from the critical path # since underlying packages are quite heavy to load and useless when the extension is present from datadog.threadstats import ThreadStats + from datadog_lambda.stats_writer import StatsWriter logger = logging.getLogger(__name__) @@ -17,7 +18,7 @@ def __init__(self, flush_in_thread): self.thread_stats = ThreadStats(compress_payload=True) self.thread_stats.start(flush_in_thread=flush_in_thread) - def distribution(self, metric_name, value, tags=[], timestamp=None): + def distribution(self, metric_name, value, tags=None, timestamp=None): self.thread_stats.distribution( metric_name, value, tags=tags, timestamp=timestamp ) diff --git a/tests/test_api.py b/tests/test_api.py index c98d91eb..59ee4ee8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,6 +1,6 @@ import os import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch import datadog_lambda.api as api @@ -22,6 +22,7 @@ def setUp(self): ) self.env_patcher.start() + @patch("datadog_lambda.api.fips_mode_enabled", True) @patch("botocore.session.Session.create_client") def test_secrets_manager_fips_endpoint(self, mock_boto3_client): mock_client = MagicMock() @@ -62,6 +63,28 @@ def test_secrets_manager_different_region(self, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") + @patch("datadog_lambda.api.fips_mode_enabled", True) + @patch("botocore.session.Session.create_client") + def test_secrets_manager_different_region_but_still_fips(self, mock_boto3_client): + mock_client = MagicMock() + mock_client.get_secret_value.return_value = {"SecretString": "test-api-key"} + mock_boto3_client.return_value = mock_client + + os.environ["AWS_REGION"] = "us-east-1" + os.environ[ + "DD_API_KEY_SECRET_ARN" + ] = "arn:aws:secretsmanager:us-west-1:1234567890:secret:key-name-123ABC" + + api_key = api.get_api_key() + + mock_boto3_client.assert_called_with( + "secretsmanager", + endpoint_url="https://secretsmanager-fips.us-west-1.amazonaws.com", + region_name="us-west-1", + ) + self.assertEqual(api_key, "test-api-key") + + @patch("datadog_lambda.api.fips_mode_enabled", True) @patch("botocore.session.Session.create_client") def test_ssm_fips_endpoint(self, mock_boto3_client): mock_client = MagicMock() @@ -80,6 +103,7 @@ def test_ssm_fips_endpoint(self, mock_boto3_client): ) self.assertEqual(api_key, "test-api-key") + @patch("datadog_lambda.api.fips_mode_enabled", True) @patch("botocore.session.Session.create_client") @patch("datadog_lambda.api.decrypt_kms_api_key") def test_kms_fips_endpoint(self, mock_decrypt_kms, mock_boto3_client): diff --git a/tests/test_dogstatsd.py b/tests/test_dogstatsd.py index 149e1a70..ea6afd48 100644 --- a/tests/test_dogstatsd.py +++ b/tests/test_dogstatsd.py @@ -1,5 +1,5 @@ -from collections import deque import unittest +from collections import deque from datadog_lambda.dogstatsd import statsd @@ -36,16 +36,20 @@ def test_init(self): self.assertEqual(statsd.port, 8125) self.assertEqual(statsd.encoding, "utf-8") - def test_distribution_no_tags(self): - statsd.distribution("my.test.metric", 3) + def _checkOnlyOneMetric(self, value): payload = self.recv() metrics = payload.split("\n") self.assertEqual(len(metrics), 1) - self.assertEqual("my.test.metric:3|d", metrics[0]) + self.assertEqual(value, metrics[0]) + + def test_distribution_no_tags(self): + statsd.distribution("my.test.metric", 3) + self._checkOnlyOneMetric("my.test.metric:3|d") def test_distribution_with_tags(self): statsd.distribution("my.test.tags.metric", 3, tags=["taga:valuea,tagb:valueb"]) - payload = self.recv() - metrics = payload.split("\n") - self.assertEqual(len(metrics), 1) - self.assertEqual("my.test.tags.metric:3|d|#taga:valuea_tagb:valueb", metrics[0]) + self._checkOnlyOneMetric("my.test.tags.metric:3|d|#taga:valuea_tagb:valueb") + + def test_distribution_with_timestamp(self): + statsd.distribution("my.test.timestamp.metric", 9, timestamp=123456789) + self._checkOnlyOneMetric("my.test.timestamp.metric:9|d|T123456789") diff --git a/tests/test_metric.py b/tests/test_metric.py index d10a0f0d..a4b0be2c 100644 --- a/tests/test_metric.py +++ b/tests/test_metric.py @@ -1,23 +1,33 @@ import os import unittest - -from unittest.mock import patch, call +from datetime import datetime, timedelta +from unittest.mock import call, patch from botocore.exceptions import ClientError as BotocoreClientError from datadog.api.exceptions import ClientError -from datetime import datetime, timedelta -from datadog_lambda.metric import lambda_metric, flush_stats -from datadog_lambda.api import decrypt_kms_api_key, KMS_ENCRYPTION_CONTEXT_KEY -from datadog_lambda.thread_stats_writer import ThreadStatsWriter +from datadog_lambda.api import KMS_ENCRYPTION_CONTEXT_KEY, decrypt_kms_api_key +from datadog_lambda.metric import ( + MetricsHandler, + _select_metrics_handler, + flush_stats, + lambda_metric, +) from datadog_lambda.tags import dd_lambda_layer_tag +from datadog_lambda.thread_stats_writer import ThreadStatsWriter class TestLambdaMetric(unittest.TestCase): def setUp(self): - patcher = patch("datadog_lambda.metric.lambda_stats") - self.mock_metric_lambda_stats = patcher.start() - self.addCleanup(patcher.stop) + lambda_stats_patcher = patch("datadog_lambda.metric.lambda_stats") + self.mock_metric_lambda_stats = lambda_stats_patcher.start() + self.addCleanup(lambda_stats_patcher.stop) + + stdout_metric_patcher = patch( + "datadog_lambda.metric.write_metric_point_to_stdout" + ) + self.mock_write_metric_point_to_stdout = stdout_metric_patcher.start() + self.addCleanup(stdout_metric_patcher.stop) def test_lambda_metric_tagged_with_dd_lambda_layer(self): lambda_metric("test", 1) @@ -35,67 +45,94 @@ def test_lambda_metric_tagged_with_dd_lambda_layer(self): # let's fake that the extension is present, this should override DD_FLUSH_TO_LOG @patch("datadog_lambda.metric.should_use_extension", True) - def test_lambda_metric_flush_to_log_with_extension(self): + def test_select_metrics_handler_extension_despite_flush_to_logs(self): + os.environ["DD_FLUSH_TO_LOG"] = "True" + self.assertEqual(MetricsHandler.EXTENSION, _select_metrics_handler()) + del os.environ["DD_FLUSH_TO_LOG"] + + @patch("datadog_lambda.metric.should_use_extension", False) + def test_select_metrics_handler_forwarder_when_flush_to_logs(self): os.environ["DD_FLUSH_TO_LOG"] = "True" + self.assertEqual(MetricsHandler.FORWARDER, _select_metrics_handler()) + del os.environ["DD_FLUSH_TO_LOG"] + + @patch("datadog_lambda.metric.should_use_extension", False) + def test_select_metrics_handler_dd_api_fallback(self): + os.environ["DD_FLUSH_TO_LOG"] = "False" + self.assertEqual(MetricsHandler.DATADOG_API, _select_metrics_handler()) + del os.environ["DD_FLUSH_TO_LOG"] + + @patch("datadog_lambda.metric.fips_mode_enabled", True) + @patch("datadog_lambda.metric.should_use_extension", False) + def test_select_metrics_handler_has_no_fallback_in_fips_mode(self): + os.environ["DD_FLUSH_TO_LOG"] = "False" + self.assertEqual(MetricsHandler.NO_METRICS, _select_metrics_handler()) + del os.environ["DD_FLUSH_TO_LOG"] + + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) + def test_lambda_metric_goes_to_extension_with_extension_handler(self): lambda_metric("test", 1) self.mock_metric_lambda_stats.distribution.assert_has_calls( [call("test", 1, timestamp=None, tags=[dd_lambda_layer_tag])] ) - del os.environ["DD_FLUSH_TO_LOG"] - @patch("datadog_lambda.metric.should_use_extension", True) - def test_lambda_metric_timestamp_with_extension(self): - patcher = patch("datadog_lambda.metric.extension_thread_stats") - self.mock_metric_extension_thread_stats = patcher.start() - self.addCleanup(patcher.stop) + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.NO_METRICS) + def test_lambda_metric_has_nowhere_to_go_with_no_metrics_handler(self): + lambda_metric("test", 1) + self.mock_metric_lambda_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) + def test_lambda_metric_timestamp_with_extension(self): delta = timedelta(minutes=1) timestamp = int((datetime.now() - delta).timestamp()) lambda_metric("test_timestamp", 1, timestamp) - self.mock_metric_lambda_stats.distribution.assert_not_called() - self.mock_metric_extension_thread_stats.distribution.assert_called_with( - "test_timestamp", 1, timestamp=timestamp, tags=[dd_lambda_layer_tag] + self.mock_metric_lambda_stats.distribution.assert_has_calls( + [call("test_timestamp", 1, timestamp=timestamp, tags=[dd_lambda_layer_tag])] ) + self.mock_write_metric_point_to_stdout.assert_not_called() - @patch("datadog_lambda.metric.should_use_extension", True) + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) def test_lambda_metric_datetime_with_extension(self): - patcher = patch("datadog_lambda.metric.extension_thread_stats") - self.mock_metric_extension_thread_stats = patcher.start() - self.addCleanup(patcher.stop) - - delta = timedelta(hours=5) + delta = timedelta(minutes=1) timestamp = datetime.now() - delta - lambda_metric("test_timestamp", 1, timestamp) - self.mock_metric_lambda_stats.distribution.assert_not_called() - self.mock_metric_extension_thread_stats.distribution.assert_not_called() + lambda_metric("test_datetime_timestamp", 0, timestamp) + self.mock_metric_lambda_stats.distribution.assert_has_calls( + [ + call( + "test_datetime_timestamp", + 0, + timestamp=int(timestamp.timestamp()), + tags=[dd_lambda_layer_tag], + ) + ] + ) + self.mock_write_metric_point_to_stdout.assert_not_called() - @patch("datadog_lambda.metric.should_use_extension", True) + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) def test_lambda_metric_invalid_timestamp_with_extension(self): - patcher = patch("datadog_lambda.metric.extension_thread_stats") - self.mock_metric_extension_thread_stats = patcher.start() - self.addCleanup(patcher.stop) - delta = timedelta(hours=5) timestamp = int((datetime.now() - delta).timestamp()) lambda_metric("test_timestamp", 1, timestamp) self.mock_metric_lambda_stats.distribution.assert_not_called() - self.mock_metric_extension_thread_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.FORWARDER) def test_lambda_metric_flush_to_log(self): - os.environ["DD_FLUSH_TO_LOG"] = "True" - lambda_metric("test", 1) self.mock_metric_lambda_stats.distribution.assert_not_called() - - del os.environ["DD_FLUSH_TO_LOG"] + self.mock_write_metric_point_to_stdout.assert_has_calls( + [call("test", 1, timestamp=None, tags=[dd_lambda_layer_tag])] + ) @patch("datadog_lambda.metric.logger.warning") def test_lambda_metric_invalid_metric_name_none(self, mock_logger_warning): lambda_metric(None, 1) self.mock_metric_lambda_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() mock_logger_warning.assert_called_once_with( "Ignoring metric submission. Invalid metric name: %s", None ) @@ -104,6 +141,7 @@ def test_lambda_metric_invalid_metric_name_none(self, mock_logger_warning): def test_lambda_metric_invalid_metric_name_not_string(self, mock_logger_warning): lambda_metric(123, 1) self.mock_metric_lambda_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() mock_logger_warning.assert_called_once_with( "Ignoring metric submission. Invalid metric name: %s", 123 ) @@ -112,6 +150,7 @@ def test_lambda_metric_invalid_metric_name_not_string(self, mock_logger_warning) def test_lambda_metric_non_numeric_value(self, mock_logger_warning): lambda_metric("test.non_numeric", "oops") self.mock_metric_lambda_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() mock_logger_warning.assert_called_once_with( "Ignoring metric submission for metric '%s' because the value is not numeric: %r", "test.non_numeric", @@ -127,10 +166,6 @@ def setUp(self): self.mock_threadstats_flush_distributions = patcher.start() self.addCleanup(patcher.stop) - patcher = patch("datadog_lambda.metric.extension_thread_stats") - self.mock_extension_thread_stats = patcher.start() - self.addCleanup(patcher.stop) - def test_retry_on_remote_disconnected(self): # Raise the RemoteDisconnected error lambda_stats = ThreadStatsWriter(True) @@ -209,10 +244,6 @@ def test_flush_temp_constant_tags(self): lambda_stats.thread_stats.constant_tags, original_constant_tags ) - def test_flush_stats_without_context(self): - flush_stats(lambda_context=None) - self.mock_extension_thread_stats.flush.assert_called_with(None) - MOCK_FUNCTION_NAME = "myFunction" From 8a01794b02244efab5814f52e442aacf71682aac Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Wed, 7 May 2025 16:45:13 -0400 Subject: [PATCH 22/26] 6.108.0 Release Candidate (#589) --- datadog_lambda/version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog_lambda/version.py b/datadog_lambda/version.py index 702691d8..bcd37def 100644 --- a/datadog_lambda/version.py +++ b/datadog_lambda/version.py @@ -1 +1 @@ -__version__ = "6.107.0" +__version__ = "6.108.0" diff --git a/pyproject.toml b/pyproject.toml index 165a8cbe..8f16b438 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datadog_lambda" -version = "6.107.0" +version = "6.108.0" description = "The Datadog AWS Lambda Library" authors = ["Datadog, Inc. "] license = "Apache-2.0" From 8ca58b40dcfa8ad4c2169a27ddca27b19c7fb348 Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Thu, 8 May 2025 11:19:03 -0400 Subject: [PATCH 23/26] fix: timestamps we send to the extension should be integers (#590) Also added some defense in depth for our lower level statsd call. --- datadog_lambda/dogstatsd.py | 2 +- datadog_lambda/metric.py | 12 ++++++++++++ tests/test_dogstatsd.py | 4 ++++ tests/test_metric.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/datadog_lambda/dogstatsd.py b/datadog_lambda/dogstatsd.py index f30a2039..a08e2592 100644 --- a/datadog_lambda/dogstatsd.py +++ b/datadog_lambda/dogstatsd.py @@ -97,7 +97,7 @@ def _serialize_metric(self, metric, metric_type, value, tags, timestamp): value, metric_type, ("|#" + ",".join(self.normalize_tags(tags))) if tags else "", - ("|T" + str(timestamp)) if timestamp is not None else "", + ("|T" + str(int(timestamp))) if timestamp is not None else "", ) def _report(self, metric, metric_type, value, tags, timestamp): diff --git a/datadog_lambda/metric.py b/datadog_lambda/metric.py index 0c18b517..c9b978d6 100644 --- a/datadog_lambda/metric.py +++ b/datadog_lambda/metric.py @@ -111,6 +111,18 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal if isinstance(timestamp, datetime): timestamp = int(timestamp.timestamp()) + else: + try: + timestamp = int(timestamp) + except Exception: + logger.debug( + "Ignoring metric submission for metric '%s' because the timestamp cannot " + "be turned into an integer: %r", + metric_name, + timestamp, + ) + return + timestamp_floor = int((datetime.now() - timedelta(hours=4)).timestamp()) if timestamp < timestamp_floor: logger.warning( diff --git a/tests/test_dogstatsd.py b/tests/test_dogstatsd.py index ea6afd48..6fe79372 100644 --- a/tests/test_dogstatsd.py +++ b/tests/test_dogstatsd.py @@ -53,3 +53,7 @@ def test_distribution_with_tags(self): def test_distribution_with_timestamp(self): statsd.distribution("my.test.timestamp.metric", 9, timestamp=123456789) self._checkOnlyOneMetric("my.test.timestamp.metric:9|d|T123456789") + + def test_distribution_with_float_timestamp(self): + statsd.distribution("my.test.timestamp.metric", 9, timestamp=123456789.123) + self._checkOnlyOneMetric("my.test.timestamp.metric:9|d|T123456789") diff --git a/tests/test_metric.py b/tests/test_metric.py index a4b0be2c..e7dab2c3 100644 --- a/tests/test_metric.py +++ b/tests/test_metric.py @@ -111,6 +111,34 @@ def test_lambda_metric_datetime_with_extension(self): ) self.mock_write_metric_point_to_stdout.assert_not_called() + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) + def test_lambda_metric_float_with_extension(self): + delta = timedelta(minutes=1) + timestamp_float = (datetime.now() - delta).timestamp() + timestamp_int = int(timestamp_float) + + lambda_metric("test_timestamp", 1, timestamp_float) + self.mock_metric_lambda_stats.distribution.assert_has_calls( + [ + call( + "test_timestamp", + 1, + timestamp=timestamp_int, + tags=[dd_lambda_layer_tag], + ) + ] + ) + self.mock_write_metric_point_to_stdout.assert_not_called() + + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) + def test_lambda_metric_timestamp_junk_with_extension(self): + delta = timedelta(minutes=1) + timestamp = (datetime.now() - delta).isoformat() + + lambda_metric("test_timestamp", 1, timestamp) + self.mock_metric_lambda_stats.distribution.assert_not_called() + self.mock_write_metric_point_to_stdout.assert_not_called() + @patch("datadog_lambda.metric.metrics_handler", MetricsHandler.EXTENSION) def test_lambda_metric_invalid_timestamp_with_extension(self): delta = timedelta(hours=5) From 1226b2de560afc6a8fe3e631ad84960b5dd03eef Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Fri, 9 May 2025 12:46:04 -0400 Subject: [PATCH 24/26] v6.109.0 (#591) --- datadog_lambda/version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog_lambda/version.py b/datadog_lambda/version.py index bcd37def..c3aaa6b7 100644 --- a/datadog_lambda/version.py +++ b/datadog_lambda/version.py @@ -1 +1 @@ -__version__ = "6.108.0" +__version__ = "6.109.0" diff --git a/pyproject.toml b/pyproject.toml index 8f16b438..cccef63e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datadog_lambda" -version = "6.108.0" +version = "6.109.0" description = "The Datadog AWS Lambda Library" authors = ["Datadog, Inc. "] license = "Apache-2.0" From 16774731b8050e3355d24ffe6d917c656cf0c09f Mon Sep 17 00:00:00 2001 From: Nicholas Hulston Date: Fri, 9 May 2025 13:21:33 -0400 Subject: [PATCH 25/26] (feat): Enable Exception Replay in Lambda (#592) * import exception replay from tracer and enable if `DD_EXCEPTION_REPLAY_ENABLED=true` * lint --- datadog_lambda/wrapper.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index e81b1baa..e5460118 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -53,6 +53,12 @@ if llmobs_env_var: from ddtrace.llmobs import LLMObs +exception_replay_env_var = os.environ.get( + "DD_EXCEPTION_REPLAY_ENABLED", "false" +).lower() in ("true", "1") +if exception_replay_env_var: + from ddtrace.debugging._exception.replay import SpanExceptionHandler + logger = logging.getLogger(__name__) DD_FLUSH_TO_LOG = "DD_FLUSH_TO_LOG" @@ -224,6 +230,11 @@ def __init__(self, func): if llmobs_env_var: LLMObs.enable() + # Enable Exception Replay + if exception_replay_env_var: + logger.debug("Enabling exception replay") + SpanExceptionHandler.enable() + logger.debug("datadog_lambda_wrapper initialized") except Exception as e: logger.error(format_err_with_traceback(e)) From 9b694f7e25c58e9cbffb97392145c6962747fa87 Mon Sep 17 00:00:00 2001 From: Joey Zhao <5253430+joeyzhao2018@users.noreply.github.com> Date: Tue, 13 May 2025 12:47:49 -0400 Subject: [PATCH 26/26] fix: safely getting all the values for trigger tags (#593) * solution2: safely getting all the values * lint * add comment back --- datadog_lambda/trigger.py | 45 +++++++++++++++++++++------ tests/test_trigger.py | 65 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 8090e36e..52978d4b 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -114,10 +114,14 @@ def parse_event_source(event: dict) -> _EventSource: event_source = None - request_context = event.get("requestContext") + # Get requestContext safely and ensure it's a dictionary + request_context = event.get("requestContext", {}) + if not isinstance(request_context, dict): + request_context = {} + if request_context and request_context.get("stage"): if "domainName" in request_context and detect_lambda_function_url_domain( - request_context.get("domainName") + request_context.get("domainName", "") ): return _EventSource(EventTypes.LAMBDA_FUNCTION_URL) event_source = _EventSource(EventTypes.API_GATEWAY) @@ -171,6 +175,8 @@ def parse_event_source(event: dict) -> _EventSource: def detect_lambda_function_url_domain(domain: str) -> bool: # e.g. "etsn5fibjr.lambda-url.eu-south-1.amazonaws.com" + if not isinstance(domain, str): + return False domain_parts = domain.split(".") if len(domain_parts) < 2: return False @@ -283,17 +289,28 @@ def extract_http_tags(event): Extracts HTTP facet tags from the triggering event """ http_tags = {} - request_context = event.get("requestContext") + + # Safely get request_context and ensure it's a dictionary + request_context = event.get("requestContext", {}) + if not isinstance(request_context, dict): + request_context = {} + path = event.get("path") method = event.get("httpMethod") + if request_context and request_context.get("stage"): - if request_context.get("domainName"): - http_tags["http.url"] = request_context.get("domainName") + domain_name = request_context.get("domainName") + if domain_name: + http_tags["http.url"] = domain_name path = request_context.get("path") method = request_context.get("httpMethod") + # Version 2.0 HTTP API Gateway - apigateway_v2_http = request_context.get("http") + apigateway_v2_http = request_context.get("http", {}) + if not isinstance(apigateway_v2_http, dict): + apigateway_v2_http = {} + if event.get("version") == "2.0" and apigateway_v2_http: path = apigateway_v2_http.get("path") method = apigateway_v2_http.get("method") @@ -303,15 +320,23 @@ def extract_http_tags(event): if method: http_tags["http.method"] = method - headers = event.get("headers") + # Safely get headers + headers = event.get("headers", {}) + if not isinstance(headers, dict): + headers = {} + if headers and headers.get("Referer"): http_tags["http.referer"] = headers.get("Referer") # Try to get `routeKey` from API GW v2; otherwise try to get `resource` from API GW v1 route = event.get("routeKey") or event.get("resource") - if route: - # "GET /my/endpoint" = > "/my/endpoint" - http_tags["http.route"] = route.split(" ")[-1] + if route and isinstance(route, str): + try: + # "GET /my/endpoint" = > "/my/endpoint" + http_tags["http.route"] = route.split(" ")[-1] + except Exception: + # If splitting fails, use the route as is + http_tags["http.route"] = route return http_tags diff --git a/tests/test_trigger.py b/tests/test_trigger.py index 9cb088f1..b4da7ff0 100644 --- a/tests/test_trigger.py +++ b/tests/test_trigger.py @@ -256,6 +256,30 @@ def test_event_source_unsupported(self): self.assertEqual(event_source.to_string(), "unknown") self.assertEqual(event_source_arn, None) + def test_event_source_with_non_dict_request_context(self): + # Test with requestContext as a string instead of a dict + event = {"requestContext": "not_a_dict"} + event_source = parse_event_source(event) + # Should still return a valid event source (unknown in this case) + self.assertEqual(event_source.to_string(), "unknown") + + def test_event_source_with_invalid_domain_name(self): + # Test with domainName that isn't a string + event = {"requestContext": {"stage": "prod", "domainName": 12345}} + event_source = parse_event_source(event) + # Should detect as API Gateway since stage is present + self.assertEqual(event_source.to_string(), "api-gateway") + + def test_detect_lambda_function_url_domain_with_invalid_input(self): + from datadog_lambda.trigger import detect_lambda_function_url_domain + + # Test with non-string input + self.assertFalse(detect_lambda_function_url_domain(None)) + self.assertFalse(detect_lambda_function_url_domain(12345)) + self.assertFalse(detect_lambda_function_url_domain({"not": "a-string"})) + # Test with string that would normally cause an exception when split + self.assertFalse(detect_lambda_function_url_domain("")) + class GetTriggerTags(unittest.TestCase): def test_extract_trigger_tags_api_gateway(self): @@ -530,6 +554,47 @@ def test_extract_trigger_tags_list_type_event(self): tags = extract_trigger_tags(event, ctx) self.assertEqual(tags, {}) + def test_extract_http_tags_with_invalid_request_context(self): + from datadog_lambda.trigger import extract_http_tags + + # Test with requestContext as a string instead of a dict + event = {"requestContext": "not_a_dict", "path": "/test", "httpMethod": "GET"} + http_tags = extract_http_tags(event) + # Should still extract valid tags from the event + self.assertEqual( + http_tags, {"http.url_details.path": "/test", "http.method": "GET"} + ) + + def test_extract_http_tags_with_invalid_apigateway_http(self): + from datadog_lambda.trigger import extract_http_tags + + # Test with http in requestContext that's not a dict + event = { + "requestContext": {"stage": "prod", "http": "not_a_dict"}, + "version": "2.0", + } + http_tags = extract_http_tags(event) + # Should not raise an exception + self.assertEqual(http_tags, {}) + + def test_extract_http_tags_with_invalid_headers(self): + from datadog_lambda.trigger import extract_http_tags + + # Test with headers that's not a dict + event = {"headers": "not_a_dict"} + http_tags = extract_http_tags(event) + # Should not raise an exception + self.assertEqual(http_tags, {}) + + def test_extract_http_tags_with_invalid_route(self): + from datadog_lambda.trigger import extract_http_tags + + # Test with routeKey that would cause a split error + event = {"routeKey": 12345} # Not a string + http_tags = extract_http_tags(event) + # Should not raise an exception + self.assertEqual(http_tags, {}) + class ExtractHTTPStatusCodeTag(unittest.TestCase): def test_extract_http_status_code_tag_from_response_dict(self):