Thanks to visit codestin.com
Credit goes to github.com

Skip to content

feat: Correct FIPS-mode metrics #588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

22 changes: 14 additions & 8 deletions datadog_lambda/api.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import os
import logging
import os

from datadog_lambda.fips import fips_mode_enabled

logger = logging.getLogger(__name__)
KMS_ENCRYPTION_CONTEXT_KEY = "LambdaFunctionName"
api_key = None


def decrypt_kms_api_key(kms_client, ciphertext):
from botocore.exceptions import ClientError
import base64

from botocore.exceptions import ClientError

"""
Decodes and deciphers the base64-encoded ciphertext given as a parameter using KMS.
For this to work properly, the Lambda function must have the appropriate IAM permissions.
Expand Down Expand Up @@ -63,10 +66,9 @@ def get_api_key() -> str:
DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", ""))

LAMBDA_REGION = os.environ.get("AWS_REGION", "")
is_gov_region = LAMBDA_REGION.startswith("us-gov-")
if is_gov_region:
if fips_mode_enabled:
logger.debug(
"Govcloud region detected. Using FIPs endpoints for secrets management."
"FIPS mode is enabled, using FIPS endpoints for secrets management."
)

if DD_API_KEY_SECRET_ARN:
Expand All @@ -80,7 +82,7 @@ def get_api_key() -> str:
return ""
endpoint_url = (
f"https://secretsmanager-fips.{secrets_region}.amazonaws.com"
if is_gov_region
if fips_mode_enabled
else None
)
secrets_manager_client = _boto3_client(
Expand All @@ -92,7 +94,9 @@ def get_api_key() -> str:
elif DD_API_KEY_SSM_NAME:
# SSM endpoints: https://docs.aws.amazon.com/general/latest/gr/ssm.html
fips_endpoint = (
f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None
f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com"
if fips_mode_enabled
else None
)
ssm_client = _boto3_client("ssm", endpoint_url=fips_endpoint)
api_key = ssm_client.get_parameter(
Expand All @@ -101,7 +105,9 @@ def get_api_key() -> str:
elif DD_KMS_API_KEY:
# KMS endpoints: https://docs.aws.amazon.com/general/latest/gr/kms.html
fips_endpoint = (
f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None
f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com"
if fips_mode_enabled
else None
)
kms_client = _boto3_client("kms", endpoint_url=fips_endpoint)
api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY)
Expand Down
27 changes: 17 additions & 10 deletions datadog_lambda/dogstatsd.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import errno
import logging
import os
import socket
import errno
import re
import socket
from threading import Lock


MIN_SEND_BUFFER_SIZE = 32 * 1024
log = logging.getLogger("datadog_lambda.dogstatsd")

Expand Down Expand Up @@ -55,14 +54,21 @@ def _get_udp_socket(cls, host, port):

return sock

def distribution(self, metric, value, tags=None):
def distribution(self, metric, value, tags=None, timestamp=None):
"""
Send a global distribution value, optionally setting tags.
Send a global distribution value, optionally setting tags. The optional
timestamp should be an integer representing seconds since the epoch
(January 1, 1970, 00:00:00 UTC).

>>> statsd.distribution("uploaded.file.size", 1445)
>>> statsd.distribution("album.photo.count", 26, tags=["gender:female"])
>>> statsd.distribution(
>>> "historic.file.count",
>>> 5,
>>> timestamp=int(datetime(2020, 2, 14, 12, 0, 0).timestamp()),
>>> )
"""
self._report(metric, "d", value, tags)
self._report(metric, "d", value, tags, timestamp)

def close_socket(self):
"""
Expand All @@ -84,20 +90,21 @@ def normalize_tags(self, tag_list):
for tag in tag_list
]

def _serialize_metric(self, metric, metric_type, value, tags):
def _serialize_metric(self, metric, metric_type, value, tags, timestamp):
# Create/format the metric packet
return "%s:%s|%s%s" % (
return "%s:%s|%s%s%s" % (
metric,
value,
metric_type,
("|#" + ",".join(self.normalize_tags(tags))) if tags else "",
("|T" + str(timestamp)) if timestamp is not None else "",
)

def _report(self, metric, metric_type, value, tags):
def _report(self, metric, metric_type, value, tags, timestamp):
if value is None:
return

payload = self._serialize_metric(metric, metric_type, value, tags)
payload = self._serialize_metric(metric, metric_type, value, tags, timestamp)

# Send it
self._send_to_server(payload)
Expand Down
19 changes: 19 additions & 0 deletions datadog_lambda/fips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import logging
import os

is_gov_region = os.environ.get("AWS_REGION", "").startswith("us-gov-")

fips_mode_enabled = (
os.environ.get(
"DD_LAMBDA_FIPS_MODE",
"true" if is_gov_region else "false",
).lower()
== "true"
)

if is_gov_region or fips_mode_enabled:
logger = logging.getLogger(__name__)
logger.debug(
"Python Lambda Layer FIPS mode is %s.",
"enabled" if fips_mode_enabled else "not enabled",
)
149 changes: 84 additions & 65 deletions datadog_lambda/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,66 @@
# This product includes software developed at Datadog (https://www.datadoghq.com/).
# Copyright 2019 Datadog, Inc.

import enum
import logging
import os
import time
import logging
import ujson as json
from datetime import datetime, timedelta

import ujson as json

from datadog_lambda.extension import should_use_extension
from datadog_lambda.tags import get_enhanced_metrics_tags, dd_lambda_layer_tag
from datadog_lambda.fips import fips_mode_enabled
from datadog_lambda.tags import dd_lambda_layer_tag, get_enhanced_metrics_tags

logger = logging.getLogger(__name__)

lambda_stats = None
extension_thread_stats = None

flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true"
class MetricsHandler(enum.Enum):
EXTENSION = "extension"
FORWARDER = "forwarder"
DATADOG_API = "datadog_api"
NO_METRICS = "no_metrics"


if should_use_extension:
def _select_metrics_handler():
if should_use_extension:
return MetricsHandler.EXTENSION
if os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true":
return MetricsHandler.FORWARDER

if fips_mode_enabled:
logger.debug(
"With FIPS mode enabled, the Datadog API metrics handler is unavailable."
)
return MetricsHandler.NO_METRICS

return MetricsHandler.DATADOG_API


metrics_handler = _select_metrics_handler()
logger.debug("identified primary metrics handler as %s", metrics_handler)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be nice to fire off a metric here, similar to the way we do for dynamodb stream settings. but since the most interesting value, NO_METRICS would actually be unavailable, i chose not to do this. let me know if you think we should still send the metric anyway, despite the NO_METRICS blind spot.



lambda_stats = None
if metrics_handler == MetricsHandler.EXTENSION:
from datadog_lambda.statsd_writer import StatsDWriter

lambda_stats = StatsDWriter()
else:

elif metrics_handler == MetricsHandler.DATADOG_API:
# Periodical flushing in a background thread is NOT guaranteed to succeed
# and leads to data loss. When disabled, metrics are only flushed at the
# end of invocation. To make metrics submitted from a long-running Lambda
# function available sooner, consider using the Datadog Lambda extension.
from datadog_lambda.thread_stats_writer import ThreadStatsWriter
from datadog_lambda.api import init_api
from datadog_lambda.thread_stats_writer import ThreadStatsWriter

flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true"
init_api()
lambda_stats = ThreadStatsWriter(flush_in_thread)


enhanced_metrics_enabled = (
os.environ.get("DD_ENHANCED_METRICS", "true").lower() == "true"
)
Expand All @@ -44,16 +73,19 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal
Submit a data point to Datadog distribution metrics.
https://docs.datadoghq.com/graphing/metrics/distributions/

When DD_FLUSH_TO_LOG is True, write metric to log, and
wait for the Datadog Log Forwarder Lambda function to submit
the metrics asynchronously.
If the Datadog Lambda Extension is present, metrics are submitted to its
dogstatsd endpoint.

When DD_FLUSH_TO_LOG is True or force_async is True, write metric to log,
and wait for the Datadog Log Forwarder Lambda function to submit the
metrics asynchronously.

Otherwise, the metrics will be submitted to the Datadog API
periodically and at the end of the function execution in a
background thread.

Note that if the extension is present, it will override the DD_FLUSH_TO_LOG value
and always use the layer to send metrics to the extension
Note that if the extension is present, it will override the DD_FLUSH_TO_LOG
value and always use the layer to send metrics to the extension
"""
if not metric_name or not isinstance(metric_name, str):
logger.warning(
Expand All @@ -71,56 +103,54 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal
)
return

flush_to_logs = os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true"
tags = [] if tags is None else list(tags)
tags.append(dd_lambda_layer_tag)

if should_use_extension and timestamp is not None:
# The extension does not support timestamps for distributions so we create a
# a thread stats writer to submit metrics with timestamps to the API
timestamp_ceiling = int(
(datetime.now() - timedelta(hours=4)).timestamp()
) # 4 hours ago
if isinstance(timestamp, datetime):
timestamp = int(timestamp.timestamp())
if timestamp_ceiling > timestamp:
logger.warning(
"Timestamp %s is older than 4 hours, not submitting metric %s",
timestamp,
metric_name,
)
return
global extension_thread_stats
if extension_thread_stats is None:
from datadog_lambda.thread_stats_writer import ThreadStatsWriter
from datadog_lambda.api import init_api

init_api()
extension_thread_stats = ThreadStatsWriter(flush_in_thread)

extension_thread_stats.distribution(
metric_name, value, tags=tags, timestamp=timestamp
)
return
if metrics_handler == MetricsHandler.EXTENSION:
if timestamp is not None:
if isinstance(timestamp, datetime):
timestamp = int(timestamp.timestamp())

timestamp_floor = int((datetime.now() - timedelta(hours=4)).timestamp())
if timestamp < timestamp_floor:
logger.warning(
"Timestamp %s is older than 4 hours, not submitting metric %s",
timestamp,
metric_name,
)
return

if should_use_extension:
logger.debug(
"Sending metric %s value %s to Datadog via extension", metric_name, value
)
lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp)

elif force_async or (metrics_handler == MetricsHandler.FORWARDER):
write_metric_point_to_stdout(metric_name, value, timestamp=timestamp, tags=tags)

elif metrics_handler == MetricsHandler.DATADOG_API:
lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp)

elif metrics_handler == MetricsHandler.NO_METRICS:
logger.debug(
"Metric %s cannot be submitted because the metrics handler is disabled",
metric_name,
),

else:
if flush_to_logs or force_async:
write_metric_point_to_stdout(
metric_name, value, timestamp=timestamp, tags=tags
)
else:
lambda_stats.distribution(
metric_name, value, tags=tags, timestamp=timestamp
)
# This should be qutie impossible, but let's at least log a message if
# it somehow happens.
logger.debug(
"Metric %s cannot be submitted because the metrics handler is not configured: %s",
metric_name,
metrics_handler,
)


def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]):
def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=None):
"""Writes the specified metric point to standard output"""
tags = tags or []

logger.debug(
"Sending metric %s value %s to Datadog via log forwarder", metric_name, value
)
Expand All @@ -138,19 +168,8 @@ def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]):


def flush_stats(lambda_context=None):
lambda_stats.flush()

if extension_thread_stats is not None:
tags = None
if lambda_context is not None:
tags = get_enhanced_metrics_tags(lambda_context)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we using the get_enhanced_metrics_tags again? Or else this would be breaking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we still use it as part of the submit_enhanced_metric call. but the extension_thread_stats was never None with the changes i put in and thus this code was not being called anymore anyway. this get function have important side effects we need to bring back somewhere?

split_arn = lambda_context.invoked_function_arn.split(":")
if len(split_arn) > 7:
# Get rid of the alias
split_arn.pop()
arn = ":".join(split_arn)
tags.append("function_arn:" + arn)
extension_thread_stats.flush(tags)
if lambda_stats is not None:
lambda_stats.flush()


def submit_enhanced_metric(metric_name, lambda_context):
Expand Down
2 changes: 1 addition & 1 deletion datadog_lambda/stats_writer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class StatsWriter:
def distribution(self, metric_name, value, tags=[], timestamp=None):
def distribution(self, metric_name, value, tags=None, timestamp=None):
raise NotImplementedError()

def flush(self):
Expand Down
6 changes: 3 additions & 3 deletions datadog_lambda/statsd_writer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from datadog_lambda.stats_writer import StatsWriter
from datadog_lambda.dogstatsd import statsd
from datadog_lambda.stats_writer import StatsWriter


class StatsDWriter(StatsWriter):
"""
Writes distribution metrics using StatsD protocol
"""

def distribution(self, metric_name, value, tags=[], timestamp=None):
statsd.distribution(metric_name, value, tags=tags)
def distribution(self, metric_name, value, tags=None, timestamp=None):
statsd.distribution(metric_name, value, tags=tags, timestamp=timestamp)

def flush(self):
pass
Expand Down
Loading
Loading