diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 0000000..d4a716b
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,36 @@
+name: Claude PR Assistant
+
+on:
+ issue_comment:
+ types: [created]
+ pull_request_review_comment:
+ types: [created]
+ issues:
+ types: [opened, assigned]
+ pull_request_review:
+ types: [submitted]
+
+jobs:
+ claude-code-action:
+ if: |
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+ (github.event_name == 'issues' && contains(github.event.issue.body, '@claude'))
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude PR Action
+ uses: anthropics/claude-code-action@beta
+ with:
+ anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+ timeout_minutes: "60"
diff --git a/.github/workflows/tfsec.yaml b/.github/workflows/tfsec.yaml
deleted file mode 100644
index 2f75a3e..0000000
--- a/.github/workflows/tfsec.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: tfsec
-on:
- push:
- branches:
- - main
- - master
- - prod
- - develop
-
-jobs:
- tfsec:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v2
- - name: Install prerequisites
- run: ./bin/install-ubuntu.sh
- - name: Terraform init
- run: terraform init --backend=false
- - name: tfsec
- uses: reviewdog/action-tfsec@master
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
- reporter: github-check
- filter_mode: nofilter
- level: error
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c2b3f6d..d2bcbd4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,7 +10,7 @@ repos:
- id: terraform_validate
exclude: common
args:
- - --hook-config=--retry-once-with-cleanup=true
+ - --hook-config=--retry-once-with-cleanup=true
- id: terraform_tflint
alias: terraform_tflint_nocreds
name: terraform_tflint_nocreds
diff --git a/.tflint.hcl b/.tflint.hcl
index 5379e3b..8224c09 100644
--- a/.tflint.hcl
+++ b/.tflint.hcl
@@ -1,5 +1,5 @@
config {
- module = true
+ call_module_type = "all"
}
plugin "aws" {
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..6b977df
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,321 @@
+# Infrastructure Standards Enforcement Guide
+
+This document consolidates AWS Tagging Standards and Infrastructure as Code (IaC) Standards for automated enforcement by AI agents and CI/CD systems.
+
+## AWS Tagging Standards Enforcement
+
+### Required Tags (ENFORCE - BLOCK ON FAILURE)
+
+These tags are mandatory for all resources and must be validated:
+
+| Tag | Validation Pattern | Error Message | Applies To |
+|-----|-------------------|---------------|------------|
+| `env` | `^[A-Za-z0-9\-_]+$` | "env tag is required. Valid formats: dev, prod, staging, ops" | ALL resources |
+| `service` | `^[A-Za-z0-9\-_]+$` | "service tag is required. Format: alphanumeric with hyphens/underscores only" | ALL resources |
+
+### Required When Code Managed (ENFORCE - CONDITIONAL)
+
+These tags are required when resources are managed by IaC:
+
+| Tag | Validation Pattern | Error Message | Condition |
+|-----|-------------------|---------------|-----------|
+| `code_managed_by` | `^(terraform\|cloudformation\|serverless\|ansible\|cdk)$` | "code_managed_by must be one of: terraform, cloudformation, serverless, ansible, cdk" | When resource is IaC managed |
+| `code_managed_at` | `^[a-zA-Z0-9\-_/]+$` | "code_managed_at must specify repository location (e.g., org/aws-infra)" | When resource is IaC managed |
+
+### Required for Ansible-Managed EC2 (ENFORCE - CONDITIONAL)
+
+| Tag | Validation Pattern | Error Message | Condition |
+|-----|-------------------|---------------|-----------|
+| `profile` | `^[A-Za-z0-9\-_]+$` | "profile tag is required for Ansible-managed EC2 instances" | EC2 instances with Ansible management |
+
+### Recommended Tags (WARN - DO NOT BLOCK)
+
+These tags should be present but won't block deployment:
+
+| Tag | Validation Pattern | Warning Message |
+|-----|-------------------|-----------------|
+| `version` | `^v[0-9.]+(\-rc[0-9]+)?$` | "version tag recommended. Format: v1.2.3 or v1.2.3-rc1" |
+| `datadog_managed` | `^(true\|false\|critical)$` | "datadog_managed tag recommended for monitoring setup" |
+| `rhythmic_managed` | `^(true\|false)$` | "rhythmic_managed tag recommended for service coverage tracking" |
+
+### Optional Tags (INFO - NO VALIDATION)
+
+These tags are optional and provide additional metadata:
+
+- `component` - Component identifier (alphanumeric with hyphens/underscores)
+- `app` - Application identifier for grouping services
+- `cost_center` - Accounting cost center (must align to customer requirements)
+- `schedule` - External scheduling tag (must align to scheduler values)
+- `backup_policy` - Backup policy identifier
+- `dlm_policy` - Data Lifecycle Management policy identifier
+
+### Bill Tags Identification
+
+Tags marked as "Bill Tags" for cost allocation:
+
+- `env` (Y)
+- `service` (Y)
+- `cost_center` (Y)
+
+## Infrastructure as Code Standards Enforcement
+
+### Tooling Requirements (ENFORCE)
+
+#### Primary Tool Standards
+
+```yaml
+terraform_requirements:
+ version_pinning: REQUIRED
+ validation_pattern: "^[0-9]+\.[0-9]+(\.[0-9]+)?$"
+ error_message: "Terraform version must be pinned in .terraform-version or terraform block"
+
+provider_versions:
+ aws_provider: REQUIRED
+ validation_pattern: "^~> [0-9]+\.[0-9]+$"
+ error_message: "AWS provider version must be pinned to at least minor version"
+```
+
+#### Repository Structure Validation
+
+```yaml
+repository_naming:
+ pattern: "^aws-[a-zA-Z0-9\-]+-[a-zA-Z0-9\-]+$"
+ error_message: "Repository must follow pattern: aws-[client]-[purpose]"
+
+required_files:
+ - ".terraform-version"
+ - "README.md"
+ - ".gitignore"
+ - "main.tf"
+ - "variables.tf"
+ - "outputs.tf"
+```
+
+### Code Quality Standards (ENFORCE)
+
+#### Terraform Formatting
+
+```yaml
+terraform_fmt:
+ enforce: true
+ command: "terraform fmt -check -recursive"
+ error_message: "Code must be formatted with 'terraform fmt'"
+
+terraform_validate:
+ enforce: true
+ command: "terraform validate"
+ error_message: "Terraform configuration must pass validation"
+```
+
+#### Security Scanning
+
+```yaml
+security_tools:
+ checkov:
+ enforce: true
+ severity_threshold: "HIGH"
+ error_message: "High severity security issues must be resolved"
+
+ tflint:
+ enforce: true
+ config_required: true
+ error_message: "TFLint issues must be resolved"
+
+ trivy:
+ enforce: true
+ severity_threshold: "HIGH"
+ error_message: "High severity vulnerabilities must be resolved"
+```
+
+### State Management Standards (ENFORCE)
+
+#### Backend Configuration
+
+```yaml
+terraform_backend:
+ type: "s3"
+ required_settings:
+ bucket: REQUIRED
+ key: REQUIRED
+ region: REQUIRED
+ encrypt: true
+ dynamodb_table: REQUIRED
+
+ validation:
+ bucket_versioning: REQUIRED
+ bucket_encryption: REQUIRED
+ error_message: "S3 backend must have versioning and encryption enabled"
+```
+
+#### State Organization
+
+```yaml
+state_file_rules:
+ max_resources_per_state: 50
+ warning_threshold: 30
+ separate_states_required:
+ - "Critical infrastructure (networking, security)"
+ - "Different environments (prod/staging/dev)"
+ - "Independent workloads"
+```
+
+### GitOps Workflow Standards (ENFORCE)
+
+#### Branch Protection
+
+```yaml
+branch_protection:
+ master_branch:
+ required_reviews: 1
+ dismiss_stale_reviews: true
+ require_code_owner_reviews: false
+ required_status_checks:
+ - "terraform-plan"
+ - "terraform-validate"
+ - "security-scan"
+ restrict_pushes: true
+```
+
+#### Pull Request Requirements
+
+```yaml
+pr_requirements:
+ terraform_plan: REQUIRED
+ plan_in_description: REQUIRED
+ validation_passed: REQUIRED
+ security_scan_passed: REQUIRED
+
+ template_sections:
+ - "## Changes"
+ - "## Business Justification"
+ - "## Terraform Plan Output"
+ - "## Testing Performed"
+```
+
+### Documentation Standards (WARN)
+
+#### README Requirements
+
+```yaml
+readme_sections:
+ required:
+ - "Purpose and scope"
+ - "Prerequisites"
+ - "Usage instructions"
+ - "Variable documentation"
+
+ validation:
+ min_length: 500
+ terraform_docs: REQUIRED
+ warning_message: "README should include all required sections"
+```
+
+#### Module Documentation
+
+```yaml
+module_documentation:
+ terraform_docs: REQUIRED
+ variable_descriptions: REQUIRED
+ output_descriptions: REQUIRED
+ example_usage: REQUIRED
+```
+
+### Resource Naming Standards (ENFORCE)
+
+#### Naming Convention
+
+```yaml
+resource_naming:
+ pattern: "^[client]-[env]-[service]-[resource]-[instance]$"
+ validation_pattern: "^[a-zA-Z0-9\-]+-[a-zA-Z0-9\-]+-[a-zA-Z0-9\-]+-[a-zA-Z0-9\-]+-[a-zA-Z0-9\-]+$"
+ error_message: "Resources must follow naming pattern: [client]-[env]-[service]-[resource]-[instance]"
+
+naming_exceptions:
+ - "IAM roles and policies (AWS character restrictions)"
+ - "S3 buckets (global uniqueness requirements)"
+ - "Lambda functions (length restrictions)"
+```
+
+## AI Agent Implementation Rules
+
+### Blocking vs Non-Blocking Validations
+
+#### BLOCK DEPLOYMENT (Return Error Code)
+- Missing required tags (`env`, `service`)
+- Missing conditional required tags when applicable
+- Terraform validation failures
+- High severity security issues
+- Unpinned Terraform versions
+- Missing state backend configuration
+- Branch protection violations
+- Failed security scans
+
+#### WARN ONLY (Log Warning, Continue)
+- Missing recommended tags
+- README documentation issues
+- Missing optional documentation
+- Style guide violations
+- Medium/Low severity security findings
+
+#### INFO ONLY (Log Information)
+- Missing optional tags
+- Code optimization suggestions
+- Best practice recommendations
+
+### Validation Order
+
+1. **Repository Structure** - Validate naming and required files
+2. **Terraform Syntax** - Run terraform validate, fmt check
+3. **Security Scanning** - Run checkov, tflint, trivy
+4. **Tag Validation** - Check required and recommended tags
+5. **State Management** - Validate backend configuration
+6. **Documentation** - Check README and module docs
+7. **Naming Conventions** - Validate resource naming patterns
+
+### Error Reporting Format
+
+```json
+{
+ "validation_result": "FAILED|PASSED|WARNING",
+ "blocking_errors": [
+ {
+ "category": "TAGGING|SECURITY|SYNTAX|NAMING",
+ "severity": "HIGH|MEDIUM|LOW",
+ "message": "Detailed error message",
+ "resource": "resource_name",
+ "fix_suggestion": "How to resolve this issue"
+ }
+ ],
+ "warnings": [],
+ "info": []
+}
+```
+
+### Exception Handling
+
+#### Tag Requirement Exceptions
+- Legacy resources (created before standards adoption)
+- Third-party managed resources
+- AWS managed resources
+
+#### Process for Exceptions
+1. Document exception in pull request
+2. Add waiver comment in code
+3. Track exceptions for future remediation
+
+### Continuous Monitoring
+
+#### Drift Detection
+- Monitor for manual changes outside IaC
+- Alert on untagged resources
+- Track compliance metrics
+- Generate compliance reports
+
+#### Metrics to Track
+- Tag compliance percentage
+- Security scan failure rate
+- Documentation coverage
+- Standard adoption rate
+- Exception requests and resolution
+
+This enforcement guide ensures consistent application of infrastructure standards while providing clear guidance for automated validation and manual review processes.
\ No newline at end of file
diff --git a/README.md b/README.md
index 2864cc9..80c209f 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,12 @@
-# terraform-datadog-monitors
+# terraform-datadog-monitor
-[](https://github.com/rhythmictech/terraform-datadog-monitors/actions?query=workflow%3Atflint+event%3Apush+branch%3Amaster)
-[](https://github.com/rhythmictech/terraform-datadog-monitors/actions?query=workflow%3Atfsec+event%3Apush+branch%3Amaster)
-[](https://github.com/rhythmictech/terraform-datadog-monitors/actions?query=workflow%3Ayamllint+event%3Apush+branch%3Amaster)
-[](https://github.com/rhythmictech/terraform-datadog-monitors/actions?query=workflow%3Amisspell+event%3Apush+branch%3Amaster)
-[](https://github.com/rhythmictech/terraform-datadog-monitors/actions?query=workflow%3Apre-commit-check+event%3Apush+branch%3Amaster)
+A collection of semi-opinionated Datadog monitors for a variety of use cases.
+
+[](https://github.com/rhythmictech/terraform-datadog-monitor/actions?query=workflow%3Atflint+event%3Apush+branch%3Amaster)
+[](https://github.com/rhythmictech/terraform-datadog-monitor/actions?query=workflow%3Atfsec+event%3Apush+branch%3Amaster)
+[](https://github.com/rhythmictech/terraform-datadog-monitor/actions?query=workflow%3Ayamllint+event%3Apush+branch%3Amaster)
+[](https://github.com/rhythmictech/terraform-datadog-monitor/actions?query=workflow%3Amisspell+event%3Apush+branch%3Amaster)
+[](https://github.com/rhythmictech/terraform-datadog-monitor/actions?query=workflow%3Apre-commit-check+event%3Apush+branch%3Amaster)
## Requirements
@@ -19,137 +21,13 @@ provider "datadog" {
}
module "monitor" {
- source = "rhythmictech/monitors/datadog/module"
+ source = "rhythmictech/monitors/datadog/aws/ec2"
}
```
## About
-```
-
-
-## Requirements
-
-| Name | Version |
-|------|---------|
-| [terraform](#requirement\_terraform) | ~> 1.5 |
-| [archive](#requirement\_archive) | >= 2.2.0 |
-| [aws](#requirement\_aws) | >= 4.62 |
-| [datadog](#requirement\_datadog) | >= 3.37 |
-| [null](#requirement\_null) | >= 3.1.0 |
-
-## Providers
-
-| Name | Version |
-|------|---------|
-| [archive](#provider\_archive) | >= 2.2.0 |
-| [aws](#provider\_aws) | 5.37.0 |
-| [datadog](#provider\_datadog) | 3.36.1 |
-| [null](#provider\_null) | >= 3.1.0 |
-
-## Modules
-
-| Name | Source | Version |
-|------|--------|---------|
-| [tags](#module\_tags) | rhythmictech/tags/terraform | ~> 1.1 |
-
-## Resources
-
-| Name | Type |
-|------|------|
-| [aws_cloudformation_stack.datadog_forwarder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudformation_stack) | resource |
-| [aws_cloudwatch_event_rule.awshealth](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
-| [aws_cloudwatch_event_rule.guardduty](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
-| [aws_cloudwatch_event_target.awshealth](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
-| [aws_cloudwatch_event_target.guardduty](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
-| [aws_cloudwatch_log_subscription_filter.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_subscription_filter) | resource |
-| [aws_cur_report_definition.cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cur_report_definition) | resource |
-| [aws_iam_policy.datadog](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
-| [aws_iam_policy.datadog_cost_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
-| [aws_iam_policy.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
-| [aws_iam_role.datadog](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
-| [aws_iam_role.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
-| [aws_iam_role_policy_attachment.cspm](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
-| [aws_iam_role_policy_attachment.datadog](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
-| [aws_iam_role_policy_attachment.datadog_cost_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
-| [aws_iam_role_policy_attachment.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
-| [aws_lambda_function.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
-| [aws_lambda_permission.awshealth_trigger](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
-| [aws_lambda_permission.cloudtrail_trigger](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
-| [aws_lambda_permission.guardduty_trigger](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
-| [aws_s3_bucket.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource |
-| [aws_s3_bucket_lifecycle_configuration.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration) | resource |
-| [aws_s3_bucket_notification.cloudtrail_notification](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_notification) | resource |
-| [aws_s3_bucket_policy.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy) | resource |
-| [aws_s3_bucket_public_access_block.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_public_access_block) | resource |
-| [aws_s3_bucket_server_side_encryption_configuration.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_server_side_encryption_configuration) | resource |
-| [aws_s3_bucket_versioning.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_versioning) | resource |
-| [aws_secretsmanager_secret.datadog](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret) | resource |
-| [aws_secretsmanager_secret_version.datadog](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret_version) | resource |
-| [datadog_api_key.datadog](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/api_key) | resource |
-| [datadog_integration_aws.datadog](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/integration_aws) | resource |
-| [datadog_integration_aws_lambda_arn.datadog_forwarder](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/integration_aws_lambda_arn) | resource |
-| [datadog_integration_aws_log_collection.datadog_forwarder](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/integration_aws_log_collection) | resource |
-| [datadog_logs_custom_pipeline.health](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/logs_custom_pipeline) | resource |
-| [datadog_logs_index.main](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/logs_index) | resource |
-| [datadog_monitor.anomaly_usage](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource |
-| [datadog_monitor.forecast_usage](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource |
-| [null_resource.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
-| [archive_file.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source |
-| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
-| [aws_iam_policy_document.assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
-| [aws_iam_policy_document.datadog_cost_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
-| [aws_iam_policy_document.local_cur](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
-| [aws_iam_policy_document.rds_enhanced_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
-| [aws_iam_policy_document.rds_enhanced_monitoring_assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
-| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
-
-## Inputs
-
-| Name | Description | Type | Default | Required |
-|------|-------------|------|---------|:--------:|
-| [cloudtrail\_buckets](#input\_cloudtrail\_buckets) | Bucket(s) to collect CloudTrail logs from | `list(string)` | `[]` | no |
-| [cur\_bucket\_suffix](#input\_cur\_bucket\_suffix) | Suffix to append to the CUR bucket name ([ACCOUNT\_ID]-[REGION]-[cur\_bucket\_suffix]) | `string` | `"datadog-cur-data"` | no |
-| [datadog\_account\_id](#input\_datadog\_account\_id) | DataDog AWS account ID (should not need changed) | `string` | `"464622532012"` | no |
-| [datadog\_site\_name](#input\_datadog\_site\_name) | DataDog site (e.g., datadoghq.com) | `string` | `"datadoghq.com"` | no |
-| [enable\_cspm\_resource\_collection](#input\_enable\_cspm\_resource\_collection) | Whether Datadog collects cloud security posture management resources from your AWS account. This includes additional resources not covered under the general resource\_collection. | `bool` | `false` | no |
-| [enable\_cur\_collection](#input\_enable\_cur\_collection) | Configure a Cost and Usage Reporting export (uses legacy CUR) suitable for ingestion by Datadog. This does not fully configure Datadog due to lack of Terraform support but does do everything on the AWS side to prepare for enabling cost monitoring in Datadog. | `bool` | `false` | no |
-| [enable\_estimated\_usage\_detection](#input\_enable\_estimated\_usage\_detection) | Enable estimated usage anomaly and forecast monitoring | `bool` | `false` | no |
-| [enable\_guardduty\_notifications](#input\_enable\_guardduty\_notifications) | Send GuardDuty notifications to Datadog (`install_log_forwarder` must be true). This routes GuardDuty events to the log forwarder. GuardDuty events can also be received as a Datadog Event through Cloud Security Monitoring. | `bool` | `true` | no |
-| [enable\_health\_notifications](#input\_enable\_health\_notifications) | Send AWS health notifications to Datadog (`install_log_forwarder` must be true). This routes AWS Health events to the log forwarder. Health events can also be received as a Datadog Event through the AWS Health integration. | `bool` | `true` | no |
-| [enable\_rds\_enhanced\_monitoring\_lambda](#input\_enable\_rds\_enhanced\_monitoring\_lambda) | Install the RDS Enhanced Monitoring Lambda | `bool` | `false` | no |
-| [enable\_resource\_collection](#input\_enable\_resource\_collection) | Enable or disable resource collection | `bool` | `true` | no |
-| [estimated\_usage\_anomaly\_message](#input\_estimated\_usage\_anomaly\_message) | Message for usage anomaly alerts | `string` | `"Datadog usage anomaly detected"` | no |
-| [estimated\_usage\_detection\_config](#input\_estimated\_usage\_detection\_config) | Map of usage types to monitor. | `map(any)` | `{}` | no |
-| [estimated\_usage\_detection\_default\_config](#input\_estimated\_usage\_detection\_default\_config) | Map of default usage monitoring settings for each metric type. All are disabled by default. Use `usage_anomaly_services` to enable services and alternately override default settings |
map(object({
anomaly_enabled = bool
anomaly_span = string
anomaly_threshold = number
anomaly_window = string
anomaly_deviations = number
anomaly_seasonality = string
anomaly_rollup = number
forecast_enabled = bool
forecast_deviations = number
forecast_rollup_type = string
forecast_rollup_value = number
forecast_threshold = number
})) | {
"hosts": {
"anomaly_deviations": 1,
"anomaly_enabled": false,
"anomaly_rollup": 600,
"anomaly_seasonality": "daily",
"anomaly_span": "last_1d",
"anomaly_threshold": 0.15,
"anomaly_window": "last_1h",
"forecast_deviations": 1,
"forecast_enabled": false,
"forecast_rollup_type": "avg",
"forecast_rollup_value": 300,
"forecast_threshold": 1000
},
"logs_indexed": {
"anomaly_deviations": 2,
"anomaly_enabled": false,
"anomaly_rollup": 60,
"anomaly_seasonality": "hourly",
"anomaly_span": "last_1d",
"anomaly_threshold": 0.15,
"anomaly_window": "last_1h",
"forecast_deviations": 1,
"forecast_enabled": false,
"forecast_rollup_type": "sum",
"forecast_rollup_value": 86400,
"forecast_threshold": 1000
},
"logs_ingested": {
"anomaly_deviations": 2,
"anomaly_enabled": false,
"anomaly_rollup": 60,
"anomaly_seasonality": "hourly",
"anomaly_span": "last_1d",
"anomaly_threshold": 0.15,
"anomaly_window": "last_1h",
"forecast_deviations": 1,
"forecast_enabled": false,
"forecast_rollup_type": "sum",
"forecast_rollup_value": 86400,
"forecast_threshold": 1000
}
} | no |
-| [install\_log\_forwarder](#input\_install\_log\_forwarder) | controls whether log forwarder lambda should be installed | `bool` | `true` | no |
-| [integration\_default\_namespace\_rules](#input\_integration\_default\_namespace\_rules) | Set all services to disabled by default. | `map(bool)` | {
"api_gateway": false,
"application_elb": false,
"apprunner": false,
"appstream": false,
"appsync": false,
"athena": false,
"auto_scaling": false,
"backup": false,
"bedrock": false,
"billing": false,
"budgeting": false,
"certificatemanager": false,
"cloudfront": false,
"cloudhsm": false,
"cloudsearch": false,
"cloudwatch_events": false,
"cloudwatch_logs": false,
"codebuild": false,
"codewhisperer": false,
"cognito": false,
"collect_custom_metrics": false,
"connect": false,
"crawl_alarms": false,
"directconnect": false,
"dms": false,
"documentdb": false,
"dynamodb": false,
"dynamodbaccelerator": false,
"ebs": false,
"ec2": false,
"ec2api": false,
"ec2spot": false,
"ecr": false,
"ecs": false,
"efs": false,
"elasticache": false,
"elasticbeanstalk": false,
"elasticinference": false,
"elastictranscoder": false,
"elb": false,
"emr": false,
"es": false,
"firehose": false,
"fsx": false,
"gamelift": false,
"globalaccelerator": false,
"glue": false,
"inspector": false,
"iot": false,
"keyspaces": false,
"kinesis": false,
"kinesis_analytics": false,
"kms": false,
"lambda": false,
"lex": false,
"mediaconnect": false,
"mediaconvert": false,
"medialive": false,
"mediapackage": false,
"mediastore": false,
"mediatailor": false,
"memorydb": false,
"ml": false,
"mq": false,
"msk": false,
"mwaa": false,
"nat_gateway": false,
"neptune": false,
"network_elb": false,
"networkfirewall": false,
"networkmonitor": false,
"opsworks": false,
"polly": false,
"privatelinkendpoints": false,
"privatelinkservices": false,
"rds": false,
"rdsproxy": false,
"redshift": false,
"rekognition": false,
"route53": false,
"route53resolver": false,
"s3": false,
"s3storagelens": false,
"sagemaker": false,
"sagemakerendpoints": false,
"sagemakerlabelingjobs": false,
"sagemakermodelbuildingpipeline": false,
"sagemakerprocessingjobs": false,
"sagemakertrainingjobs": false,
"sagemakertransformjobs": false,
"sagemakerworkteam": false,
"service_quotas": false,
"ses": false,
"shield": false,
"sns": false,
"sqs": false,
"step_functions": false,
"storage_gateway": false,
"swf": false,
"textract": false,
"transitgateway": false,
"translate": false,
"trusted_advisor": false,
"usage": false,
"vpn": false,
"waf": false,
"wafv2": false,
"workspaces": false,
"xray": false
} | no |
-| [integration\_excluded\_regions](#input\_integration\_excluded\_regions) | Regions to exclude from DataDog monitoring | `list(string)` | `[]` | no |
-| [integration\_filter\_tags](#input\_integration\_filter\_tags) | Tags to filter EC2 instances on (see https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/integration_aws) | `list(string)` | `[]` | no |
-| [integration\_host\_tags](#input\_integration\_host\_tags) | Tags to apply to instances (see https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/integration_aws) | `list(string)` | `[]` | no |
-| [integration\_namespace\_rules](#input\_integration\_namespace\_rules) | Map of AWS services to allow in the integration. Defaults to none. | `map(bool)` | `{}` | no |
-| [log\_forwarder\_sources](#input\_log\_forwarder\_sources) | List of services to automatically ingest all logs from (see https://docs.datadoghq.com/api/latest/aws-logs-integration/#get-list-of-aws-log-ready-services) | `list(string)` | `[]` | no |
-| [logs\_main\_index\_daily\_limit](#input\_logs\_main\_index\_daily\_limit) | Daily log limit for the main index (only used if `logs_manage_main_index == true`) | `number` | `null` | no |
-| [logs\_main\_index\_daily\_limit\_reset\_offset](#input\_logs\_main\_index\_daily\_limit\_reset\_offset) | The reset time timezone offset for the daily limit of the main logs index (specify as +HH:MM or -HH:MM) | `string` | `"+00:00"` | no |
-| [logs\_main\_index\_daily\_limit\_reset\_time](#input\_logs\_main\_index\_daily\_limit\_reset\_time) | The reset time for the daily limit of the main logs index (specify as HH:MM) | `string` | `"00:00"` | no |
-| [logs\_main\_index\_daily\_limit\_warn\_threshold](#input\_logs\_main\_index\_daily\_limit\_warn\_threshold) | Warning threshold for daily log volume for the main index (only used if `logs_manage_main_index == true`) | `number` | `0.9` | no |
-| [logs\_main\_index\_exclusion\_filters](#input\_logs\_main\_index\_exclusion\_filters) | A list of maps defining exclusion filters for the main index | list(object({
name = string
is_enabled = bool
filter = object({
query = string
sample_rate = number
})
})) | `[]` | no |
-| [logs\_main\_index\_retention\_days](#input\_logs\_main\_index\_retention\_days) | The number of days to retain logs in the main index (only used if `logs_manage_main_index == true`) | `number` | `15` | no |
-| [logs\_manage\_main\_index](#input\_logs\_manage\_main\_index) | A boolean flag to manage the main Datadog logs index | `bool` | `false` | no |
-| [name](#input\_name) | Moniker to apply to all resources in the module | `string` | n/a | yes |
-| [rds\_enhanced\_monitoring\_forwarder\_version](#input\_rds\_enhanced\_monitoring\_forwarder\_version) | Version of the Datadog RDS enhanced monitoring lambda to use (module is only tested against the default version) | `string` | `"3.103.0"` | no |
-| [tags](#input\_tags) | User-Defined tags | `map(string)` | `{}` | no |
-| [use\_full\_permissions](#input\_use\_full\_permissions) | Controls whether DataDog is given full permissions or core permissions. Generally you want full. | `bool` | `true` | no |
-
-## Outputs
-
-| Name | Description |
-|------|-------------|
-| [iam\_role\_datadog](#output\_iam\_role\_datadog) | IAM role assumed by Datadog resources |
-| [lambda\_arn\_forwarder](#output\_lambda\_arn\_forwarder) | DataDog Lambda Forwarder ARN |
-
-
## Requirements
diff --git a/aws/acm/.terraform.lock.hcl b/aws/acm/.terraform.lock.hcl
new file mode 100644
index 0000000..bda5b8d
--- /dev/null
+++ b/aws/acm/.terraform.lock.hcl
@@ -0,0 +1,44 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/datadog/datadog" {
+ version = "3.76.0"
+ constraints = ">= 3.37.0"
+ hashes = [
+ "h1:WDN2Ar3dt3s35zy1jng7O28gDRaGZgLqqDzvb9pAtFo=",
+ "zh:055bfca2ab3d987fb1a6827c27673d08d50d7c6026be020db0176722fe73715a",
+ "zh:0568ce6217ca42c06d0d3d7788194d806c415f3ed831285751c245aa19377d3a",
+ "zh:48a82913ff629d3e20db815f98c916493abda8fef5c8a0258401e5122a5823d9",
+ "zh:893f70d972afbaf92f4d7c2d8e0cc2d542ac4fa46931ff7ef176dc53948fe985",
+ "zh:9213a9ad4f0a0806cf048c0a73fcf4abaf4b5a0459b5a82f5ce15e567f24fddf",
+ "zh:a22d66b33f372b703cc6e680880689ad7f46473ea402e2d82c1167793e953191",
+ "zh:a9606223ad215174a871cfc49e97aee04fbad4b1d444f6f00c49f76682596ddf",
+ "zh:adede6b1f1536b90339288b2741b0cdde3f5e26107bedef1ac04e15d164a3d9a",
+ "zh:c4597d7bb5fb9c17c3a11d58d1aa052f004cac080176ee995ee8a04ac8f9a2d3",
+ "zh:c4a64ca1ba1b58b90466c51591327d09214022dd47885fada20655b7e9830074",
+ "zh:d3f28181e605090b96d0059f201209649bde557feec2bd5955538dae88cc1a20",
+ "zh:d40402ba3289816f933895f009a04f525894417d6fc839c19cb12e612a9fea0b",
+ "zh:e7308e1315e9328f0c3ae9c631231bf71541b8220b5a9b3be32e00990b3b344d",
+ "zh:eb8189220c79020cff0033aa433332999b35b8c0a85a0a6afbd6a663b7211e0b",
+ ]
+}
+
+provider "registry.terraform.io/hashicorp/null" {
+ version = "3.2.4"
+ constraints = ">= 3.1.0"
+ hashes = [
+ "h1:L5V05xwp/Gto1leRryuesxjMfgZwjb7oool4WS1UEFQ=",
+ "zh:59f6b52ab4ff35739647f9509ee6d93d7c032985d9f8c6237d1f8a59471bbbe2",
+ "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
+ "zh:795c897119ff082133150121d39ff26cb5f89a730a2c8c26f3a9c1abf81a9c43",
+ "zh:7b9c7b16f118fbc2b05a983817b8ce2f86df125857966ad356353baf4bff5c0a",
+ "zh:85e33ab43e0e1726e5f97a874b8e24820b6565ff8076523cc2922ba671492991",
+ "zh:9d32ac3619cfc93eb3c4f423492a8e0f79db05fec58e449dee9b2d5873d5f69f",
+ "zh:9e15c3c9dd8e0d1e3731841d44c34571b6c97f5b95e8296a45318b94e5287a6e",
+ "zh:b4c2ab35d1b7696c30b64bf2c0f3a62329107bd1a9121ce70683dec58af19615",
+ "zh:c43723e8cc65bcdf5e0c92581dcbbdcbdcf18b8d2037406a5f2033b1e22de442",
+ "zh:ceb5495d9c31bfb299d246ab333f08c7fb0d67a4f82681fbf47f2a21c3e11ab5",
+ "zh:e171026b3659305c558d9804062762d168f50ba02b88b231d20ec99578a6233f",
+ "zh:ed0fe2acdb61330b01841fa790be00ec6beaac91d41f311fb8254f74eb6a711f",
+ ]
+}
diff --git a/aws/acm/README.md b/aws/acm/README.md
new file mode 100644
index 0000000..7e764a4
--- /dev/null
+++ b/aws/acm/README.md
@@ -0,0 +1,65 @@
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | ~> 1.5 |
+| [datadog](#requirement\_datadog) | >= 3.37 |
+| [null](#requirement\_null) | >= 3.1.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| [datadog](#provider\_datadog) | 3.76.0 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [datadog_monitor.certificate_renewal_failure_check](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [additional\_tags](#input\_additional\_tags) | Additional tags (key:value format) to add to this type of check (combined with `local.tags` and `var.base_tags`) | `list(string)` | `[]` | no |
+| [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no |
+| [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no |
+| [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no |
+| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` | [| no | +| [certificate\_renewal\_failure\_check\_enabled](#input\_certificate\_renewal\_failure\_check\_enabled) | Whether to enable the certificate renewal failure check | `bool` | `true` | no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:acm"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + \ No newline at end of file diff --git a/aws/acm/common.tf b/aws/acm/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/aws/acm/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/aws/acm/main.tf b/aws/acm/main.tf new file mode 100644 index 0000000..a1c84be --- /dev/null +++ b/aws/acm/main.tf @@ -0,0 +1,30 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "certificate_renewal_failure_check" { + count = var.certificate_renewal_failure_check_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "ACM - Certificate Renewal Failure", local.title_suffix]) + type = "event-v2 alert" + message = local.event_alert_base_message + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + include_tags = false + + evaluation_delay = var.evaluation_delay + new_group_delay = var.new_group_delay + + query = <<-EOQ + events("source:amazon_acm").rollup("count").by("@aggregation_key,env").last("5m") > 0 + EOQ + + monitor_thresholds { + critical = 0 + } +} diff --git a/aws/acm/variables.tf b/aws/acm/variables.tf new file mode 100644 index 0000000..b406c50 --- /dev/null +++ b/aws/acm/variables.tf @@ -0,0 +1,23 @@ +######################################## +# Global variables +######################################## +variable "additional_tags" { + default = [] + description = "Additional tags (key:value format) to add to this type of check (combined with `local.tags` and `var.base_tags`)" + type = list(string) +} + +variable "base_tags" { + default = ["resource:acm"] + description = "Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this)" + type = list(string) +} + +######################################## +# Certificate Renewal Failure Check +######################################## +variable "certificate_renewal_failure_check_enabled" { + default = true + description = "Whether to enable the certificate renewal failure check" + type = bool +} diff --git a/aws/acm/versions.tf b/aws/acm/versions.tf new file mode 120000 index 0000000..cbeda73 --- /dev/null +++ b/aws/acm/versions.tf @@ -0,0 +1 @@ +../../common/versions.tf \ No newline at end of file diff --git a/aws/alb/.terraform.lock.hcl b/aws/alb/.terraform.lock.hcl new file mode 100644 index 0000000..73aa0ff --- /dev/null +++ b/aws/alb/.terraform.lock.hcl @@ -0,0 +1,46 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/datadog/datadog" { + version = "3.37.0" + constraints = ">= 3.37.0" + hashes = [ + "h1:D5lzAUqnEF3458Fu5wxNc4KaCy46CgcPIHM+b1e71YU=", + "h1:yAUN0kckUMwxTwy/uD5co/bQ5oAJyGzo3UWLDcH26Yc=", + "zh:0bf149256e2166d77a7cea60adcf1486bf18c65120caccba3f9caf643a0937a7", + "zh:246c40bbb198f918a7380c059ac2e9110832215e0e00c06fdd8a5c34fb01f797", + "zh:3ff8667f41e9f787de6dae59c3ee447177c062613787368e7f8ae0b1efdbe25f", + "zh:47e9008c9aa0e58e0c7eb71cac0408a74399180961cd9f23807fedb3a52b4df9", + "zh:620f32bfcdfbc0ce2316d5e3b5587cdbace901706d08be70f591ef22f0cda40b", + "zh:760fc52ff88ff34f2e7ed9f2d78dc2f7509c06faa133b811bd74fdb92032ae19", + "zh:805ae94bbec4a213859bbb0f3d98a013792f55b782996fecc40b39933b83afed", + "zh:88b040e6cd964e9bef62b5eaeed8330377a40fc197de43df842741fea241bc78", + "zh:8dee6c95f23479bb7469c2648351a8f42f696deeab59287385f2ac32868e444f", + "zh:bb0c18c9e85d19cf176999e0e636a5461c2a041ed7cfe560632b9bba3eb3085b", + "zh:d4f133a24d0881c772d73ab4b4b5e7f28a02a26eca1674c5c77a9bd983151ad7", + "zh:dc488ef450760162f3d4306497a065649fe65967eadd2a7a9a2239f4ba751a9c", + "zh:dcd88c3747c9a348c317b4a1ecadb7756de879085c6014cf368d63ce60526176", + "zh:f0b1eb479c13fd0a4f724e7ada1bd08fc69032ecc866619800e2742e23882730", + ] +} + +provider "registry.terraform.io/hashicorp/null" { + version = "3.2.2" + constraints = ">= 3.1.0" + hashes = [ + "h1:IMVAUHKoydFrlPrl9OzasDnw/8ntZFerCC9iXw1rXQY=", + "h1:vWAsYRd7MjYr3adj8BVKRohVfHpWQdvkIwUQ2Jf5FVM=", + "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", + "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", + "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3", + "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606", + "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546", + "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539", + "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422", + "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae", + "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1", + "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e", + ] +} diff --git a/aws/alb/README.md b/aws/alb/README.md index ec3899f..44f2709 100644 --- a/aws/alb/README.md +++ b/aws/alb/README.md @@ -20,7 +20,7 @@ Configures the following for ALBs based on tags matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -46,23 +46,26 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | +| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `true` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | +| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `true` | no | | [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | +| [http\_5xx\_tg\_responses\_use\_message](#input\_http\_5xx\_tg\_responses\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | +| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `true` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | +| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `3` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | @@ -71,13 +74,17 @@ No modules. | [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [no\_healthy\_instances\_threshold\_critical](#input\_no\_healthy\_instances\_threshold\_critical) | Critical threshold (percentage) | `number` | `0` | no | | [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [no\_healthy\_instances\_use\_message](#input\_no\_healthy\_instances\_use\_message) | Whether to use the query alert base message | `bool` | `true` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/alb/main.tf b/aws/alb/main.tf index 928c5a5..e5449ca 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -4,16 +4,16 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ALB 5xx Responses - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "ALB 5xx Responses - {{loadbalancer.name}}", local.title_suffix]) + include_tags = false + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = <
"resource:alb"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `0.75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `0.25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx responses monitor | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message for the latency monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/apigateway/main.tf b/aws/apigateway/main.tf index 40bbc20..f624851 100644 --- a/aws/apigateway/main.tf +++ b/aws/apigateway/main.tf @@ -4,16 +4,16 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "API Gateway 5xx Responses - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "API Gateway 5xx Responses - {{apiname.name}}", local.title_suffix]) + include_tags = false + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = <
"resource:apigateway"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [health\_enabled](#input\_health\_enabled) | Enable Beanstalk health monitor (requires enhanced metrics) | `bool` | `false` | no | | [health\_evaluation\_window](#input\_health\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`) | `string` | `"last_5m"` | no | | [health\_no\_data\_window](#input\_health\_no\_data\_window) | No date threshold (minutes) | `number` | `20` | no | | [health\_threshold\_critical](#input\_health\_threshold\_critical) | Critical threshold (
"resource:beanstalk"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | -| [status\_failed\_check\_enabled](#input\_status\_failed\_check\_enabled) | Enable ec2 instance status check monitor | `bool` | `false` | no | +| [status\_failed\_check\_enabled](#input\_status\_failed\_check\_enabled) | Enable ec2 instance status check monitor | `bool` | `true` | no | | [status\_failed\_check\_evaluation\_window](#input\_status\_failed\_check\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_check\_no\_data\_window](#input\_status\_failed\_check\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_check\_threshold\_critical](#input\_status\_failed\_check\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_check\_threshold\_warning](#input\_status\_failed\_check\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_instance\_enabled](#input\_status\_failed\_instance\_enabled) | Enable instance status check monitor | `bool` | `false` | no | +| [status\_failed\_check\_use\_message](#input\_status\_failed\_check\_use\_message) | Whether to use the query alert base message for ec2 instance status check monitor | `bool` | `false` | no | +| [status\_failed\_instance\_enabled](#input\_status\_failed\_instance\_enabled) | Enable instance status check monitor | `bool` | `true` | no | | [status\_failed\_instance\_evaluation\_window](#input\_status\_failed\_instance\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_instance\_no\_data\_window](#input\_status\_failed\_instance\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_instance\_threshold\_critical](#input\_status\_failed\_instance\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_instance\_threshold\_warning](#input\_status\_failed\_instance\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_system\_enabled](#input\_status\_failed\_system\_enabled) | Enable instance system failure monitor | `bool` | `false` | no | +| [status\_failed\_instance\_use\_message](#input\_status\_failed\_instance\_use\_message) | Whether to use the query alert base message for instance status check monitor | `bool` | `false` | no | +| [status\_failed\_system\_enabled](#input\_status\_failed\_system\_enabled) | Enable instance system failure monitor | `bool` | `true` | no | | [status\_failed\_system\_evaluation\_window](#input\_status\_failed\_system\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_system\_no\_data\_window](#input\_status\_failed\_system\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_system\_threshold\_critical](#input\_status\_failed\_system\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_system\_threshold\_warning](#input\_status\_failed\_system\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_volume\_enabled](#input\_status\_failed\_volume\_enabled) | Enable attached volume status monitor | `bool` | `false` | no | +| [status\_failed\_system\_use\_message](#input\_status\_failed\_system\_use\_message) | Whether to use the query alert base message for instance system failure monitor | `bool` | `false` | no | +| [status\_failed\_volume\_enabled](#input\_status\_failed\_volume\_enabled) | Enable attached volume status monitor | `bool` | `true` | no | | [status\_failed\_volume\_evaluation\_window](#input\_status\_failed\_volume\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_volume\_no\_data\_window](#input\_status\_failed\_volume\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_volume\_threshold\_critical](#input\_status\_failed\_volume\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_volume\_threshold\_warning](#input\_status\_failed\_volume\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [status\_failed\_volume\_use\_message](#input\_status\_failed\_volume\_use\_message) | Whether to use the query alert base message for attached volume status monitor | `bool` | `false` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | diff --git a/aws/ec2/main.tf b/aws/ec2/main.tf index c75eefa..337c979 100644 --- a/aws/ec2/main.tf +++ b/aws/ec2/main.tf @@ -4,31 +4,30 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "status_failed_check" { count = var.status_failed_check_enabled ? 1 : 0 - name = join("", [local.title_prefix, "EC2 instance status - status check failure - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "EC2 instance status - status check failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) + include_tags = false + message = var.status_failed_check_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" evaluation_delay = var.evaluation_delay new_group_delay = var.new_group_delay - notify_no_data = var.notify_no_data - no_data_timeframe = var.status_failed_check_no_data_window + notify_no_data = false renotify_interval = var.renotify_interval require_full_window = true timeout_h = var.timeout_h query = <
"resource:ec2"
]
[| no | -| [cluster\_health\_red\_enabled](#input\_cluster\_health\_red\_enabled) | Enable cluster health\_red monitor | `bool` | `false` | no | +| [cluster\_health\_red\_enabled](#input\_cluster\_health\_red\_enabled) | Enable cluster health\_red monitor | `bool` | `true` | no | | [cluster\_health\_red\_evaluation\_window](#input\_cluster\_health\_red\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_red\_no\_data\_window](#input\_cluster\_health\_red\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [cluster\_health\_yellow\_enabled](#input\_cluster\_health\_yellow\_enabled) | Enable cluster health monitor | `bool` | `false` | no | +| [cluster\_health\_red\_use\_message](#input\_cluster\_health\_red\_use\_message) | Whether to use the query alert base message for cluster health red monitor | `bool` | `true` | no | +| [cluster\_health\_yellow\_enabled](#input\_cluster\_health\_yellow\_enabled) | Enable cluster health monitor | `bool` | `true` | no | | [cluster\_health\_yellow\_evaluation\_window](#input\_cluster\_health\_yellow\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_yellow\_no\_data\_window](#input\_cluster\_health\_yellow\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cluster\_health\_yellow\_use\_message](#input\_cluster\_health\_yellow\_use\_message) | Whether to use the query alert base message for cluster health yellow monitor | `bool` | `false` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [cpu\_utilization\_anomaly\_deviations](#input\_cpu\_utilization\_anomaly\_deviations) | Standard deviations | `number` | `4` | no | | [cpu\_utilization\_anomaly\_enabled](#input\_cpu\_utilization\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | @@ -62,29 +64,35 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | -| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `false` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `true` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `0.9` | no | -| [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `0.8` | no | +| [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | +| [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `false` | no | +| [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `true` | no | | [free\_storage\_evaluation\_window](#input\_free\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [free\_storage\_no\_data\_window](#input\_free\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold (GB) | `number` | `null` | no | -| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold (GB) | `number` | `null` | no | +| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold for used disk space (%) | `number` | `90` | no | +| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold for used disk space (%) | `number` | `80` | no | +| [free\_storage\_use\_message](#input\_free\_storage\_use\_message) | Whether to use the query alert base message for free storage monitor | `bool` | `true` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/elasticsearch/main.tf b/aws/elasticsearch/main.tf index bd80c24..479754c 100644 --- a/aws/elasticsearch/main.tf +++ b/aws/elasticsearch/main.tf @@ -4,16 +4,16 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "cluster_health_red" { count = var.cluster_health_red_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ElasticSearch cluster health red - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "ElasticSearch cluster health red - {{name.name}}", local.title_suffix]) + include_tags = false + message = var.cluster_health_red_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "cluster_health_red" { query = <
"resource:elasticsearch"
]
[| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"resource:alb"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [http\_5xx\_backend\_responses\_enabled](#input\_http\_5xx\_backend\_responses\_enabled) | Enable HTTP 5xx response monitor (backend) | `bool` | `false` | no | +| [http\_5xx\_backend\_responses\_evaluation\_window](#input\_http\_5xx\_backend\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [http\_5xx\_backend\_responses\_no\_data\_window](#input\_http\_5xx\_backend\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [http\_5xx\_backend\_responses\_threshold\_critical](#input\_http\_5xx\_backend\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [http\_5xx\_backend\_responses\_threshold\_warning](#input\_http\_5xx\_backend\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_backend\_responses\_use\_message](#input\_http\_5xx\_backend\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx backend responses monitor | `bool` | `false` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx responses monitor | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message for latency monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | | [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | +| [no\_healthy\_instances\_threshold\_critical](#input\_no\_healthy\_instances\_threshold\_critical) | Warning threshold (percentage) | `number` | `0` | no | +| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [no\_healthy\_instances\_use\_message](#input\_no\_healthy\_instances\_use\_message) | Whether to use the query alert base message for no healthy instances monitor | `bool` | `true` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/elb/main.tf b/aws/elb/main.tf index 9dbb696..dfce887 100644 --- a/aws/elb/main.tf +++ b/aws/elb/main.tf @@ -4,16 +4,16 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ELB 5xx Responses - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "ELB 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) + include_tags = false + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = <
"resource:lb"
]
[| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"resource:alb"
]
[| no | +| [cold\_starts\_enabled](#input\_cold\_starts\_enabled) | Enable cold starts monitor (requires enhanced metrics) | `bool` | `false` | no | +| [cold\_starts\_evaluation\_window](#input\_cold\_starts\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | +| [cold\_starts\_no\_data\_window](#input\_cold\_starts\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [cold\_starts\_threshold\_critical](#input\_cold\_starts\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [cold\_starts\_threshold\_warning](#input\_cold\_starts\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [cold\_starts\_use\_message](#input\_cold\_starts\_use\_message) | Whether to use the query alert base message for cold starts monitor | `bool` | `false` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [error\_rate\_enabled](#input\_error\_rate\_enabled) | Enable Lambda error rate monitor | `bool` | `true` | no | +| [error\_rate\_evaluation\_window](#input\_error\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [error\_rate\_no\_data\_window](#input\_error\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [error\_rate\_threshold\_critical](#input\_error\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [error\_rate\_threshold\_warning](#input\_error\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [error\_rate\_use\_message](#input\_error\_rate\_use\_message) | Whether to use the query alert base message for error rate monitor | `bool` | `true` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [iterator\_age\_enabled](#input\_iterator\_age\_enabled) | Enable iterator age monitor | `bool` | `false` | no | +| [iterator\_age\_evaluation\_window](#input\_iterator\_age\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [iterator\_age\_forecast\_enabled](#input\_iterator\_age\_forecast\_enabled) | Enable iterator age monitor | `bool` | `false` | no | +| [iterator\_age\_forecast\_evaluation\_window](#input\_iterator\_age\_forecast\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1d"` | no | +| [iterator\_age\_forecast\_no\_data\_window](#input\_iterator\_age\_forecast\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [iterator\_age\_forecast\_use\_message](#input\_iterator\_age\_forecast\_use\_message) | Whether to use the query alert base message for iterator age forecast monitor | `bool` | `false` | no | +| [iterator\_age\_no\_data\_window](#input\_iterator\_age\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [iterator\_age\_threshold\_critical](#input\_iterator\_age\_threshold\_critical) | Critical threshold (milliseconds) | `number` | `86400000` | no | +| [iterator\_age\_threshold\_warning](#input\_iterator\_age\_threshold\_warning) | Warning threshold (milliseconds) | `number` | `null` | no | +| [iterator\_age\_use\_message](#input\_iterator\_age\_use\_message) | Whether to use the query alert base message for iterator age monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [out\_of\_memory\_enabled](#input\_out\_of\_memory\_enabled) | Enable out of memory monitor (requires enhanced metrics) | `bool` | `true` | no | +| [out\_of\_memory\_evaluation\_window](#input\_out\_of\_memory\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | +| [out\_of\_memory\_no\_data\_window](#input\_out\_of\_memory\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [out\_of\_memory\_threshold\_critical](#input\_out\_of\_memory\_threshold\_critical) | Critical threshold (count) | `number` | `5` | no | +| [out\_of\_memory\_threshold\_warning](#input\_out\_of\_memory\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [out\_of\_memory\_use\_message](#input\_out\_of\_memory\_use\_message) | Whether to use the query alert base message for out of memory monitor | `bool` | `false` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [throttle\_rate\_enabled](#input\_throttle\_rate\_enabled) | Enable Lambda throttle rate monitor | `bool` | `true` | no | +| [throttle\_rate\_evaluation\_window](#input\_throttle\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [throttle\_rate\_no\_data\_window](#input\_throttle\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [throttle\_rate\_threshold\_critical](#input\_throttle\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [throttle\_rate\_threshold\_warning](#input\_throttle\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [throttle\_rate\_use\_message](#input\_throttle\_rate\_use\_message) | Whether to use the query alert base message for throttle rate monitor | `bool` | `false` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [timeouts\_enabled](#input\_timeouts\_enabled) | Enable timeout count monitor | `bool` | `true` | no | +| [timeouts\_evaluation\_window](#input\_timeouts\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [timeouts\_no\_data\_window](#input\_timeouts\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [timeouts\_threshold\_critical](#input\_timeouts\_threshold\_critical) | Critical threshold (count) | `number` | `75` | no | +| [timeouts\_threshold\_warning](#input\_timeouts\_threshold\_warning) | Warning threshold (count) | `number` | `25` | no | +| [timeouts\_use\_message](#input\_timeouts\_use\_message) | Whether to use the query alert base message for timeouts monitor | `bool` | `false` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | diff --git a/aws/lambda/main.tf b/aws/lambda/main.tf index 8e11de1..e37a8f4 100644 --- a/aws/lambda/main.tf +++ b/aws/lambda/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" cold_start_query_filter = local.query_filter == "{*}" ? "{cold_start:true}" : replace(local.query_filter, "{", "{cold_star:true,") @@ -13,9 +13,9 @@ locals { resource "datadog_monitor" "error_rate" { count = var.error_rate_enabled ? 1 : 0 - name = join("", [local.title_prefix, "Lambda error rate - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "Lambda error rate - {{functionname.name}} - {{value}}%", local.title_suffix]) + include_tags = false + message = var.error_rate_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -29,8 +29,8 @@ resource "datadog_monitor" "error_rate" { query = <
"resource:lambda"
]
[| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"resource:alb"
]
[| no | +| [connection\_count\_anomaly\_deviations](#input\_connection\_count\_anomaly\_deviations) | Standard deviations | `number` | `3` | no | +| [connection\_count\_anomaly\_enabled](#input\_connection\_count\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `true` | no | +| [connection\_count\_anomaly\_evaluation\_window](#input\_connection\_count\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | +| [connection\_count\_anomaly\_no\_data\_window](#input\_connection\_count\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [connection\_count\_anomaly\_recovery\_window](#input\_connection\_count\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [connection\_count\_anomaly\_rollup](#input\_connection\_count\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | +| [connection\_count\_anomaly\_seasonality](#input\_connection\_count\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | +| [connection\_count\_anomaly\_threshold\_critical](#input\_connection\_count\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `0.75` | no | +| [connection\_count\_anomaly\_threshold\_warning](#input\_connection\_count\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | +| [connection\_count\_anomaly\_trigger\_window](#input\_connection\_count\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [connection\_count\_anomaly\_use\_message](#input\_connection\_count\_anomaly\_use\_message) | Whether to use the query alert base message for connection count anomaly monitor | `bool` | `true` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [cpu\_utilization\_anomaly\_deviations](#input\_cpu\_utilization\_anomaly\_deviations) | Standard deviations | `number` | `4` | no | +| [cpu\_utilization\_anomaly\_enabled](#input\_cpu\_utilization\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_anomaly\_evaluation\_window](#input\_cpu\_utilization\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_no\_data\_window](#input\_cpu\_utilization\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cpu\_utilization\_anomaly\_recovery\_window](#input\_cpu\_utilization\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [cpu\_utilization\_anomaly\_rollup](#input\_cpu\_utilization\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | +| [cpu\_utilization\_anomaly\_seasonality](#input\_cpu\_utilization\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | +| [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | +| [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | +| [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `true` | no | +| [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | +| [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [used\_storage\_enabled](#input\_used\_storage\_enabled) | Enable used storage monitor | `bool` | `true` | no | +| [used\_storage\_evaluation\_window](#input\_used\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [used\_storage\_no\_data\_window](#input\_used\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [used\_storage\_threshold\_critical](#input\_used\_storage\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `90` | no | +| [used\_storage\_threshold\_warning](#input\_used\_storage\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `80` | no | +| [used\_storage\_use\_message](#input\_used\_storage\_use\_message) | Whether to use the query alert base message for used storage monitor | `bool` | `true` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | ## Outputs diff --git a/aws/rds/main.tf b/aws/rds/main.tf index bae6ab5..1d4f125 100644 --- a/aws/rds/main.tf +++ b/aws/rds/main.tf @@ -1,19 +1,25 @@ locals { # these must be defined but do not need to be overridden - monitor_alert_default_priority = null - monitor_warn_default_priority = null + # tflint-ignore: terraform_unused_declarations + monitor_alert_default_priority = null + # tflint-ignore: terraform_unused_declarations + monitor_warn_default_priority = null + # tflint-ignore: terraform_unused_declarations monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" + + # Add RDS-specific query filter to exclude + rds_query_filter = "{!engine:aurora*,${trimprefix(local.query_filter, "{")}" } resource "datadog_monitor" "connection_count_anomaly" { count = var.connection_count_anomaly_enabled ? 1 : 0 - name = join("", [local.title_prefix, "RDS connection count anomalous activity - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "RDS connection count anomalous activity - {{dbinstanceidentifier.name}}", local.title_suffix]) + include_tags = false + message = var.connection_count_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +33,7 @@ resource "datadog_monitor" "connection_count_anomaly" { query = <
"resource:rds"
]
[| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"resource:alb"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [oldest\_message\_enabled](#input\_oldest\_message\_enabled) | Enable oldest queued message monitor | `bool` | `false` | no | +| [oldest\_message\_evaluation\_window](#input\_oldest\_message\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [oldest\_message\_no\_data\_window](#input\_oldest\_message\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [oldest\_message\_threshold\_critical](#input\_oldest\_message\_threshold\_critical) | Critical threshold (seconds) | `number` | `75` | no | +| [oldest\_message\_threshold\_warning](#input\_oldest\_message\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [oldest\_message\_use\_message](#input\_oldest\_message\_use\_message) | Whether to use the query alert base message for oldest message monitor | `bool` | `false` | no | +| [queue\_depth\_enabled](#input\_queue\_depth\_enabled) | Enable queue depth count monitor | `bool` | `false` | no | +| [queue\_depth\_evaluation\_window](#input\_queue\_depth\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [queue\_depth\_no\_data\_window](#input\_queue\_depth\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [queue\_depth\_threshold\_critical](#input\_queue\_depth\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [queue\_depth\_threshold\_warning](#input\_queue\_depth\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [queue\_depth\_use\_message](#input\_queue\_depth\_use\_message) | Whether to use the query alert base message for queue depth monitor | `bool` | `false` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/sqs/main.tf b/aws/sqs/main.tf index c0bb6bb..6c98447 100644 --- a/aws/sqs/main.tf +++ b/aws/sqs/main.tf @@ -4,16 +4,16 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] " + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } resource "datadog_monitor" "oldest_message" { count = var.oldest_message_enabled ? 1 : 0 - name = join("", [local.title_prefix, "Oldest queued message - {{host.name}}", local.title_suffix]) - include_tags = true - message = local.query_alert_base_message + name = join("", [local.title_prefix, "Oldest queued message - {{queuename.name}}", local.title_suffix]) + include_tags = false + message = var.oldest_message_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "oldest_message" { query = <
"resource:queue"
]
[| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
"resource:alb"
]
[| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [tunnel\_state\_enabled](#input\_tunnel\_state\_enabled) | Enable VPN tunnel state monitor | `bool` | `false` | no | +| [tunnel\_state\_evaluation\_window](#input\_tunnel\_state\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [tunnel\_state\_no\_data\_window](#input\_tunnel\_state\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | ## Outputs diff --git a/aws/vpn/main.tf b/aws/vpn/main.tf index 304e91b..bd4df6a 100644 --- a/aws/vpn/main.tf +++ b/aws/vpn/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "tunnel_state" { count = var.tunnel_state_enabled ? 1 : 0 name = join("", [local.title_prefix, "VPN tunnel state - {{host.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "tunnel_state" { query = <
"resource:vpn"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [host\_unreachable\_enabled](#input\_host\_unreachable\_enabled) | Flag to enable Host unreachable monitor | `bool` | `true` | no | +| [host\_unreachable\_use\_message](#input\_host\_unreachable\_use\_message) | Flag to enable Host unreachable alerting | `bool` | `true` | no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/agent/common.tf b/host/agent/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/agent/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/agent/main.tf b/host/agent/main.tf new file mode 100644 index 0000000..1d28ecc --- /dev/null +++ b/host/agent/main.tf @@ -0,0 +1,37 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "host_unreachable" { + count = var.host_unreachable_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "Datadog Agent Status - {{name.name}}", local.title_suffix]) + include_tags = false + message = var.host_unreachable_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "service check" + + evaluation_delay = var.evaluation_delay + new_group_delay = var.new_group_delay + no_data_timeframe = "5" + notify_no_data = true + renotify_interval = var.renotify_interval + require_full_window = true + timeout_h = var.timeout_h + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [system\_clock\_enabled](#input\_system\_clock\_enabled) | Flag to enable Host unreachable monitor | `bool` | `true` | no | +| [system\_clock\_use\_message](#input\_system\_clock\_use\_message) | Flag to enable Host unreachable alerting | `bool` | `false` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/clock/common.tf b/host/clock/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/clock/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/clock/main.tf b/host/clock/main.tf new file mode 100644 index 0000000..be6e892 --- /dev/null +++ b/host/clock/main.tf @@ -0,0 +1,36 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "system_clock" { + count = var.system_clock_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "System Clock - {{name.name}}", local.title_suffix]) + include_tags = false + message = var.system_clock_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "service check" + + evaluation_delay = var.evaluation_delay + new_group_delay = var.new_group_delay + notify_no_data = false + renotify_interval = var.renotify_interval + require_full_window = true + timeout_h = var.timeout_h + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Flag to enable CPU Utilitzation monitor | `bool` | `true` | no | +| [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | +| [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_time\_aggregator](#input\_cpu\_utilization\_time\_aggregator) | Monitor aggregator for CPU high [available values: min, max or avg] | `string` | `"min"` | no | +| [cpu\_utilization\_timeframe](#input\_cpu\_utilization\_timeframe) | Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Flag to enable CPU Utilitzation alerting | `bool` | `false` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/cpu/common.tf b/host/cpu/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/cpu/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/cpu/main.tf b/host/cpu/main.tf new file mode 100644 index 0000000..faa5e9a --- /dev/null +++ b/host/cpu/main.tf @@ -0,0 +1,39 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "cpu_utilization" { + count = var.cpu_utilization_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "CPU Utilization - {{name.name}}", local.title_suffix]) + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "query alert" + + evaluation_delay = var.evaluation_delay + new_group_delay = var.new_group_delay + notify_no_data = false + no_data_timeframe = var.cpu_utilization_no_data_window + renotify_interval = var.renotify_interval + require_full_window = true + timeout_h = var.timeout_h + include_tags = false + + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [disk\_inodes\_enabled](#input\_disk\_inodes\_enabled) | Flag to enable Free disk inodes monitor | `string` | `"true"` | no | +| [disk\_inodes\_threshold\_critical](#input\_disk\_inodes\_threshold\_critical) | Free disk space critical threshold | `number` | `95` | no | +| [disk\_inodes\_threshold\_warning](#input\_disk\_inodes\_threshold\_warning) | Free disk space warning threshold | `number` | `90` | no | +| [disk\_inodes\_time\_aggregator](#input\_disk\_inodes\_time\_aggregator) | Monitor aggregator for Free disk inodes [available values: min, max or avg] | `string` | `"min"` | no | +| [disk\_inodes\_timeframe](#input\_disk\_inodes\_timeframe) | Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [disk\_inodes\_use\_message](#input\_disk\_inodes\_use\_message) | Flag to enable Free disk inodes alerting | `string` | `"true"` | no | +| [disk\_space\_enabled](#input\_disk\_space\_enabled) | Flag to enable Free diskspace monitor | `string` | `"true"` | no | +| [disk\_space\_forecast\_algorithm](#input\_disk\_space\_forecast\_algorithm) | Algorithm for the Free diskspace Forecast monitor [available values: `linear` or `seasonal`] | `string` | `"linear"` | no | +| [disk\_space\_forecast\_deviations](#input\_disk\_space\_forecast\_deviations) | Deviations for the Free diskspace Forecast monitor [available values: `1`, `2`, `3`, `4` or `5`] | `string` | `1` | no | +| [disk\_space\_forecast\_enabled](#input\_disk\_space\_forecast\_enabled) | Flag to enable Free diskspace forecast monitor | `string` | `"true"` | no | +| [disk\_space\_forecast\_interval](#input\_disk\_space\_forecast\_interval) | Interval for the Free diskspace Forecast monitor [available values: `30m`, `60m` or `120m`] | `string` | `"60m"` | no | +| [disk\_space\_forecast\_linear\_history](#input\_disk\_space\_forecast\_linear\_history) | History for the Free diskspace Forecast monitor [available values: `12h`, `#d` (1, 2, or 3), `#w` (1, or 2) or `#mo` (1, 2 or 3)] | `string` | `"1w"` | no | +| [disk\_space\_forecast\_linear\_model](#input\_disk\_space\_forecast\_linear\_model) | Model for the Free diskspace Forecast monitor [available values: `default`, `simple` or `reactive`] | `string` | `"default"` | no | +| [disk\_space\_forecast\_seasonal\_seasonality](#input\_disk\_space\_forecast\_seasonal\_seasonality) | Seasonality for the Free diskspace Forecast monitor | `string` | `"weekly"` | no | +| [disk\_space\_forecast\_threshold\_critical](#input\_disk\_space\_forecast\_threshold\_critical) | Free disk space forecast critical threshold | `number` | `80` | no | +| [disk\_space\_forecast\_threshold\_critical\_recovery](#input\_disk\_space\_forecast\_threshold\_critical\_recovery) | Free disk space forecast recovery threshold | `number` | `72` | no | +| [disk\_space\_forecast\_time\_aggregator](#input\_disk\_space\_forecast\_time\_aggregator) | Monitor aggregator for Free diskspace forecast [available values: min, max or avg] | `string` | `"max"` | no | +| [disk\_space\_forecast\_timeframe](#input\_disk\_space\_forecast\_timeframe) | Monitor timeframe for Free diskspace forecast [available values: `next_12h`, `next_#d` (1, 2, or 3), `next_#w` (1 or 2) or `next_#mo` (1, 2 or 3)] | `string` | `"next_1w"` | no | +| [disk\_space\_forecast\_use\_message](#input\_disk\_space\_forecast\_use\_message) | Flag to enable Free diskspace forecast alerting | `string` | `"false"` | no | +| [disk\_space\_threshold\_critical](#input\_disk\_space\_threshold\_critical) | Free disk space critical threshold | `number` | `90` | no | +| [disk\_space\_threshold\_warning](#input\_disk\_space\_threshold\_warning) | Free disk space warning threshold | `number` | `80` | no | +| [disk\_space\_time\_aggregator](#input\_disk\_space\_time\_aggregator) | Monitor aggregator for Free diskspace [available values: min, max or avg] | `string` | `"max"` | no | +| [disk\_space\_timeframe](#input\_disk\_space\_timeframe) | Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [disk\_space\_use\_message](#input\_disk\_space\_use\_message) | Flag to enable Free diskspace alerting | `string` | `"true"` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/disk/common.tf b/host/disk/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/disk/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/disk/main.tf b/host/disk/main.tf new file mode 100644 index 0000000..ce57f41 --- /dev/null +++ b/host/disk/main.tf @@ -0,0 +1,100 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "disk_space" { + count = var.disk_space_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "Disk Space - {{name.name}}", local.title_suffix]) + message = var.disk_space_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "query alert" + + evaluation_delay = var.evaluation_delay + new_group_delay = var.new_group_delay + notify_no_data = false + notify_audit = false + timeout_h = var.timeout_h + include_tags = true + require_full_window = true + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [memory\_enabled](#input\_memory\_enabled) | Flag to enable Free memory monitor | `string` | `"true"` | no | +| [memory\_threshold\_critical](#input\_memory\_threshold\_critical) | Free disk space critical threshold | `number` | `5` | no | +| [memory\_threshold\_warning](#input\_memory\_threshold\_warning) | Free disk space warning threshold | `number` | `10` | no | +| [memory\_time\_aggregator](#input\_memory\_time\_aggregator) | Monitor aggregator for Free memory [available values: min, max or avg] | `string` | `"max"` | no | +| [memory\_timeframe](#input\_memory\_timeframe) | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [memory\_use\_message](#input\_memory\_use\_message) | Flag to enable Free memory alerting | `string` | `"true"` | no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/memory/common.tf b/host/memory/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/memory/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/memory/main.tf b/host/memory/main.tf new file mode 100644 index 0000000..4bc8ffb --- /dev/null +++ b/host/memory/main.tf @@ -0,0 +1,39 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "memory" { + count = var.memory_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "Usable Memory - {{name.name}}", local.title_suffix]) + include_tags = false + message = var.memory_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "query alert" + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [process\_alert\_enabled](#input\_process\_alert\_enabled) | Flag to enable Process Check monitor | `string` | `"true"` | no | +| [process\_alert\_operator](#input\_process\_alert\_operator) | Operator for Process Alert Query [available values: `<, >, <=, >=, =`] | `string` | `"<"` | no | +| [process\_alert\_process\_name](#input\_process\_alert\_process\_name) | Name of Process for Process Alert | `string` | `""` | no | +| [process\_alert\_threshold\_critical](#input\_process\_alert\_threshold\_critical) | Process Alert critical threshold | `number` | `1` | no | +| [process\_alert\_threshold\_warning](#input\_process\_alert\_threshold\_warning) | Process Alert warning threshold | `number` | `null` | no | +| [process\_alert\_timeframe](#input\_process\_alert\_timeframe) | Monitor timeframe for Process Alert [available values: `#m` (1, 5, 10, 15, or 30), `#h` (1, 2, or 4), or `1d`] | `string` | `"5m"` | no | +| [process\_alert\_use\_message](#input\_process\_alert\_use\_message) | Flag to enable Process Check alerting | `string` | `"true"` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/process/common.tf b/host/process/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/process/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/process/main.tf b/host/process/main.tf new file mode 100644 index 0000000..5acbf0a --- /dev/null +++ b/host/process/main.tf @@ -0,0 +1,35 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "process_alert" { + count = var.process_alert_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "Process Alert - {{host.name}}", local.title_suffix]) + message = var.process_alert_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "process alert" + + evaluation_delay = var.evaluation_delay + notify_no_data = false + renotify_interval = 0 + notify_audit = false + timeout_h = var.timeout_h + include_tags = false + require_full_window = true + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [group\_by](#input\_group\_by) | List of tags to group by | `list(string)` |
"resource:ec2"
]
[| no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `60` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [swap\_enabled](#input\_swap\_enabled) | Flag to enable Swap monitor | `string` | `"true"` | no | +| [swap\_threshold\_critical](#input\_swap\_threshold\_critical) | Free Swap critical threshold as percentage | `number` | `0.1` | no | +| [swap\_threshold\_warning](#input\_swap\_threshold\_warning) | Free Swap warning threshold as percentage | `number` | `0.3` | no | +| [swap\_time\_aggregator](#input\_swap\_time\_aggregator) | Monitor aggregator for Free Swap [available values: min, max or avg] | `string` | `"max"` | no | +| [swap\_timeframe](#input\_swap\_timeframe) | Monitor timeframe for Free Swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [swap\_use\_message](#input\_swap\_use\_message) | Flag to enable Swap alerting | `string` | `"false"` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/host/swap/common.tf b/host/swap/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/host/swap/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/host/swap/main.tf b/host/swap/main.tf new file mode 100644 index 0000000..0e097e7 --- /dev/null +++ b/host/swap/main.tf @@ -0,0 +1,38 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_monitor" "swap" { + count = var.swap_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "Usable Swap - {{name.name}}", local.title_suffix]) + message = var.swap_use_message ? local.query_alert_base_message : "" + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + type = "query alert" + + query = <
"name",
"aws_account",
"env",
"datadog_managed"
]
[| no | +| [browser\_synthetic\_device\_ids](#input\_browser\_synthetic\_device\_ids) | List with the different device IDs used to run the test. Valid values are laptop\_large, tablet, mobile\_small, chrome.laptop\_large, chrome.tablet, chrome.mobile\_small, firefox.laptop\_large, firefox.tablet, firefox.mobile\_small, edge.laptop\_large, edge.tablet, edge.mobile\_small. | `list(string)` |
"resource:apigateway"
]
[| no | +| [browser\_synthetic\_enabled](#input\_browser\_synthetic\_enabled) | Flag to enable Browser Synthetic Test. | `bool` | `true` | no | +| [browser\_synthetic\_locations](#input\_browser\_synthetic\_locations) | An array of datadog locations used to run Browser Synthetic Test. | `list(string)` |
"laptop_large"
]
[| no | +| [browser\_synthetic\_request\_url](#input\_browser\_synthetic\_request\_url) | URL to send Browser Synthetic Test requests to. | `string` | n/a | yes | +| [browser\_synthetic\_steps](#input\_browser\_synthetic\_steps) | Steps for the Browser Synthetic Test to take. |
"aws:us-east-1"
]
list(object({
name = string
type = string
params = object({
attribute = optional(string)
check = optional(string)
click_type = optional(string)
code = optional(string)
delay = optional(number)
element = optional(string)
element_user_locator = optional(map(string))
email = optional(string)
file = optional(string)
files = optional(string)
modifiers = optional(list(string))
playing_tab_id = optional(string)
request = optional(string)
subtest_public_id = optional(string)
value = optional(string)
variable = optional(map(string))
with_click = optional(bool)
x = optional(number)
y = optional(number)
})
})) | n/a | yes |
+| [browser\_synthetic\_tick\_every](#input\_browser\_synthetic\_tick\_every) | How often Browser Synthetic Test should run in seconds. | `number` | `900` | no |
+| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no |
+| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no |
+| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes |
+| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no |
+| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no |
+| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no |
+| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no |
+| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no |
+| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes |
+| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no |
+| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no |
+| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no |
+| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no |
+| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no |
+| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no |
+| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no |
+| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no |
+| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no |
+| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no |
+| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no |
+| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no |
+
+## Outputs
+
+No outputs.
+
diff --git a/synthetics/browser/common.tf b/synthetics/browser/common.tf
new file mode 120000
index 0000000..47c0063
--- /dev/null
+++ b/synthetics/browser/common.tf
@@ -0,0 +1 @@
+../../common/common.tf
\ No newline at end of file
diff --git a/synthetics/browser/main.tf b/synthetics/browser/main.tf
new file mode 100644
index 0000000..dc54e85
--- /dev/null
+++ b/synthetics/browser/main.tf
@@ -0,0 +1,74 @@
+locals {
+ # these must be defined but do not need to be overridden
+ monitor_alert_default_priority = null
+ monitor_warn_default_priority = null
+ monitor_nodata_default_priority = null
+
+ title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}[${var.env}] "
+ title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})"
+}
+
+resource "datadog_synthetics_test" "browser" {
+ count = var.browser_synthetic_enabled ? 1 : 0
+
+ name = join("", [local.title_prefix, "Browser Test", local.title_suffix])
+ type = "browser"
+ status = "live"
+ message = local.synthetic_alert_base_message
+ device_ids = var.browser_synthetic_device_ids
+ locations = var.browser_synthetic_locations
+ tags = concat(local.common_tags, var.base_tags, var.additional_tags)
+
+ request_definition {
+ method = "GET"
+ url = var.browser_synthetic_request_url
+ }
+
+ dynamic "browser_step" {
+ for_each = var.browser_synthetic_steps
+ content {
+ name = browser_step.value.name
+ type = browser_step.value.type
+ params {
+ attribute = lookup(browser_step.value.params, "attribute", null)
+ check = lookup(browser_step.value.params, "check", null)
+ click_type = lookup(browser_step.value.params, "click_type", null)
+ code = lookup(browser_step.value.params, "code", null)
+ delay = lookup(browser_step.value.params, "delay", null)
+ element = lookup(browser_step.value.params, "element", null)
+ dynamic "element_user_locator" {
+ for_each = browser_step.value.params.element_user_locator != null ? browser_step.value.params.element_user_locator : {}
+ content {
+ value {
+ value = lookup(element_user_locator.value, "value", null)
+ type = lookup(element_user_locator.value, "type", null)
+ }
+ fail_test_on_cannot_locate = lookup(element_user_locator.value, "fail_test_on_cannot_locate", null)
+ }
+ }
+ email = lookup(browser_step.value.params, "email", null)
+ file = lookup(browser_step.value.params, "file", null)
+ files = lookup(browser_step.value.params, "files", null)
+ modifiers = lookup(browser_step.value.params, "modifiers", null)
+ playing_tab_id = lookup(browser_step.value.params, "playing_tab_id", null)
+ request = lookup(browser_step.value.params, "request", null)
+ subtest_public_id = lookup(browser_step.value.params, "subtest_public_id", null)
+ value = lookup(browser_step.value.params, "value", null)
+ dynamic "variable" {
+ for_each = browser_step.value.params.variable != null ? browser_step.value.params.variable : {}
+ content {
+ example = lookup(variable.value, "example", null)
+ name = lookup(variable.value, "name", null)
+ }
+ }
+ with_click = lookup(browser_step.value.params, "with_click", null)
+ x = lookup(browser_step.value.params, "x", null)
+ y = lookup(browser_step.value.params, "y", null)
+ }
+ }
+ }
+
+ options_list {
+ tick_every = var.browser_synthetic_tick_every
+ }
+}
diff --git a/synthetics/browser/variables.tf b/synthetics/browser/variables.tf
new file mode 100644
index 0000000..8353930
--- /dev/null
+++ b/synthetics/browser/variables.tf
@@ -0,0 +1,75 @@
+########################################
+# Global variables
+########################################
+variable "additional_tags" {
+ default = []
+ description = "Additional tags (key:value format) to add to this type of check (combined with `local.tags` and `var.base_tags`)"
+ type = list(string)
+}
+
+variable "base_tags" {
+ default = []
+ description = "Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this)"
+ type = list(string)
+}
+
+########################################
+# Browser Synthetic Test variables
+########################################
+variable "browser_synthetic_enabled" {
+ description = "Flag to enable Browser Synthetic Test."
+ type = bool
+ default = true
+}
+
+variable "browser_synthetic_device_ids" {
+ description = "List with the different device IDs used to run the test. Valid values are laptop_large, tablet, mobile_small, chrome.laptop_large, chrome.tablet, chrome.mobile_small, firefox.laptop_large, firefox.tablet, firefox.mobile_small, edge.laptop_large, edge.tablet, edge.mobile_small."
+ type = list(string)
+ default = ["laptop_large"]
+}
+
+variable "browser_synthetic_locations" {
+ description = "An array of datadog locations used to run Browser Synthetic Test."
+ type = list(string)
+ default = ["aws:us-east-1"]
+}
+
+variable "browser_synthetic_request_url" {
+ description = "URL to send Browser Synthetic Test requests to."
+ type = string
+}
+
+variable "browser_synthetic_steps" {
+ description = "Steps for the Browser Synthetic Test to take."
+ type = list(object({
+ name = string
+ type = string
+ params = object({
+ attribute = optional(string)
+ check = optional(string)
+ click_type = optional(string)
+ code = optional(string)
+ delay = optional(number)
+ element = optional(string)
+ element_user_locator = optional(map(string))
+ email = optional(string)
+ file = optional(string)
+ files = optional(string)
+ modifiers = optional(list(string))
+ playing_tab_id = optional(string)
+ request = optional(string)
+ subtest_public_id = optional(string)
+ value = optional(string)
+ variable = optional(map(string))
+ with_click = optional(bool)
+ x = optional(number)
+ y = optional(number)
+ })
+ }))
+}
+
+variable "browser_synthetic_tick_every" {
+ description = "How often Browser Synthetic Test should run in seconds."
+ type = number
+ default = 900
+}
diff --git a/synthetics/browser/versions.tf b/synthetics/browser/versions.tf
new file mode 120000
index 0000000..cbeda73
--- /dev/null
+++ b/synthetics/browser/versions.tf
@@ -0,0 +1 @@
+../../common/versions.tf
\ No newline at end of file
diff --git a/synthetics/ssl/.terraform.lock.hcl b/synthetics/ssl/.terraform.lock.hcl
new file mode 100644
index 0000000..c3f1a74
--- /dev/null
+++ b/synthetics/ssl/.terraform.lock.hcl
@@ -0,0 +1,46 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/datadog/datadog" {
+ version = "3.42.0"
+ constraints = ">= 3.37.0"
+ hashes = [
+ "h1:Wi04Hu0HzKTJilkBg7oLoZJyuWLfIXfvGYX7Vdi7VnI=",
+ "h1:tXm3tAaqCBejFSOBzaPersNH9VrSl3XxJ2+5uW6jcq4=",
+ "zh:168651f7fb057da1747056f600971b20741ee8dae9a10b8d84862dec032a0609",
+ "zh:1d4dd8dc0a367721ebcf6418d31612f2c1cca253ef1183e9247deecc14a5e4dd",
+ "zh:262cde1f060ad33612f709f9d73d744db3eb11bedc7c2326692aa5aa82d675f4",
+ "zh:27aa69e2c707136198814252c5e8d9660cf560ba614253cf8c51ca086ac2c9aa",
+ "zh:437b97e85b181c70b29cd0ceaee470bb654cec38efe459ae8fac58c75e082e81",
+ "zh:5011d1d68226c50f996f72dfca04029c2ef33e0e7ff79ddf54941dc704bf5d74",
+ "zh:65e048c49aa4739fbdc62096c58a8ee33f3ed2f2ff169d3e042bc17374f36484",
+ "zh:8b4fb3221839228a30b83d2591925952c86206dcac4d517a7822384ed7ff29ba",
+ "zh:9389e9224d815f8b684742d45383e981b26a3c87ce3828101db153d901a10d87",
+ "zh:97f9dcfc33edd824a65ee7debcf4b437b146c4a18fce5e3b98a3f0e9aab97b1e",
+ "zh:a1c39f73301335d99a9ed154d5b32b9208540c33ba6c6eb07574eba828ae6814",
+ "zh:c10413ba2e1985cb499221e83f0b848a547a3648e1d9e75c546ad8c50f12478b",
+ "zh:e9bb68a4beaafa4630cd7ddf7206d1cd799497dc6cac2c9de5c419e40f81c1bf",
+ "zh:eefac5e52e0fe756eeac8c336f9fd64adb8d86155695b4e7a11a4874f1e3bc85",
+ ]
+}
+
+provider "registry.terraform.io/hashicorp/null" {
+ version = "3.2.2"
+ constraints = ">= 3.1.0"
+ hashes = [
+ "h1:IMVAUHKoydFrlPrl9OzasDnw/8ntZFerCC9iXw1rXQY=",
+ "h1:vWAsYRd7MjYr3adj8BVKRohVfHpWQdvkIwUQ2Jf5FVM=",
+ "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7",
+ "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a",
+ "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3",
+ "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606",
+ "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546",
+ "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539",
+ "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452",
+ "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
+ "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422",
+ "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae",
+ "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1",
+ "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e",
+ ]
+}
diff --git a/synthetics/ssl/README.md b/synthetics/ssl/README.md
new file mode 100644
index 0000000..3928138
--- /dev/null
+++ b/synthetics/ssl/README.md
@@ -0,0 +1,73 @@
+# terraform-datadog-monitor/synthetics/ssl
+
+Configures Synthetic Test for SSL Certificate
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | ~> 1.5 |
+| [datadog](#requirement\_datadog) | >= 3.37 |
+| [null](#requirement\_null) | >= 3.1.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| [datadog](#provider\_datadog) | 3.42.0 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [datadog_synthetics_test.ssl](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/synthetics_test) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [additional\_tags](#input\_additional\_tags) | Additional tags (key:value format) to add to this type of check (combined with `local.tags` and `var.base_tags`) | `list(string)` | `[]` | no |
+| [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no |
+| [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no |
+| [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no |
+| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` | [| no | +| [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | +| [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | +| [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | +| [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | +| [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | +| [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | +| [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [ssl\_synthetic\_accept\_self\_signed](#input\_ssl\_synthetic\_accept\_self\_signed) | Whether or not SSL Synthetic Test should allow self signed certificates. | `bool` | `false` | no | +| [ssl\_synthetic\_days\_to\_expiration](#input\_ssl\_synthetic\_days\_to\_expiration) | Number of Days till certificate expiration for SSL Synthetic Test to alert. | `number` | `7` | no | +| [ssl\_synthetic\_enabled](#input\_ssl\_synthetic\_enabled) | Flag to enable SSL Synthetic Test | `string` | `"true"` | no | +| [ssl\_synthetic\_host](#input\_ssl\_synthetic\_host) | Host name to perform SSL Synthetic Test with. | `string` | n/a | yes | +| [ssl\_synthetic\_locations](#input\_ssl\_synthetic\_locations) | An array of datadog locations used to run SSL Synthetic Test | `list(string)` |
"resource:apigateway"
]
[| no | +| [ssl\_synthetic\_max\_response\_time](#input\_ssl\_synthetic\_max\_response\_time) | Number of milliseconds host response time should be less than. | `number` | `2000` | no | +| [ssl\_synthetic\_min\_tls\_version](#input\_ssl\_synthetic\_min\_tls\_version) | Number Certificate TLS version should be equal to or greater than. | `number` | `1.2` | no | +| [ssl\_synthetic\_port](#input\_ssl\_synthetic\_port) | Port to use when performing SSL Synthetic Test. | `number` | `443` | no | +| [ssl\_synthetic\_tick\_every](#input\_ssl\_synthetic\_tick\_every) | How often SSL Synthetic Test should run in seconds. | `number` | `900` | no | +| [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | +| [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/synthetics/ssl/common.tf b/synthetics/ssl/common.tf new file mode 120000 index 0000000..47c0063 --- /dev/null +++ b/synthetics/ssl/common.tf @@ -0,0 +1 @@ +../../common/common.tf \ No newline at end of file diff --git a/synthetics/ssl/main.tf b/synthetics/ssl/main.tf new file mode 100644 index 0000000..b165446 --- /dev/null +++ b/synthetics/ssl/main.tf @@ -0,0 +1,70 @@ +locals { + # these must be defined but do not need to be overridden + monitor_alert_default_priority = null + monitor_warn_default_priority = null + monitor_nodata_default_priority = null + + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" + title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" +} + +resource "datadog_synthetics_test" "ssl" { + count = var.ssl_synthetic_enabled ? 1 : 0 + + name = join("", [local.title_prefix, "SSL Certificate expiration - ${var.ssl_synthetic_host}", local.title_suffix]) + type = "api" + subtype = "ssl" + status = "live" + message = <<-EOT + {{#is_alert}} + ${local.notify_on_prod} + SSL certificate is expiring in less than {{threshold}} days. + Please renew the certificate to prevent service disruption. + {{/is_alert}} + + {{#is_recovery}} + ${local.notify_on_prod} + SSL certificate has been renewed and is no longer at risk of expiring. + {{/is_recovery}} + EOT + locations = var.ssl_synthetic_locations + tags = concat(local.common_tags, var.base_tags, var.additional_tags) + + request_definition { + host = var.ssl_synthetic_host + port = var.ssl_synthetic_port + } + + assertion { + type = "certificate" + operator = "isInMoreThan" + target = var.ssl_synthetic_days_to_expiration + } + + dynamic "assertion" { + for_each = var.ssl_synthetic_tls_check_enabled ? [1] : [] + content { + type = "tlsVersion" + operator = "moreThanOrEqual" + target = var.ssl_synthetic_min_tls_version + } + } + + dynamic "assertion" { + for_each = var.ssl_synthetic_response_time_check_enabled ? [1] : [] + content { + type = "responseTime" + operator = "lessThan" + target = var.ssl_synthetic_max_response_time + } + } + + options_list { + tick_every = var.ssl_synthetic_tick_every + accept_self_signed = var.ssl_synthetic_accept_self_signed + + monitor_options { + renotify_interval = var.ssl_synthetic_renotify_interval + } + } +} diff --git a/synthetics/ssl/variables.tf b/synthetics/ssl/variables.tf new file mode 100644 index 0000000..16a61c7 --- /dev/null +++ b/synthetics/ssl/variables.tf @@ -0,0 +1,88 @@ +######################################## +# Global variables +######################################## +variable "additional_tags" { + default = [] + description = "Additional tags (key:value format) to add to this type of check (combined with `local.tags` and `var.base_tags`)" + type = list(string) +} + +variable "base_tags" { + default = [] + description = "Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this)" + type = list(string) +} + +######################################## +# SSL Synthetic Test variables +######################################## +variable "ssl_synthetic_enabled" { + description = "Flag to enable SSL Synthetic Test" + type = string + default = "true" +} + +variable "ssl_synthetic_locations" { + description = "An array of datadog locations used to run SSL Synthetic Test" + type = list(string) + default = ["aws:us-east-1"] +} + +variable "ssl_synthetic_host" { + description = "Host name to perform SSL Synthetic Test with." + type = string +} + +variable "ssl_synthetic_port" { + description = "Port to use when performing SSL Synthetic Test." + type = number + default = 443 +} + +variable "ssl_synthetic_days_to_expiration" { + description = "Number of Days till certificate expiration for SSL Synthetic Test to alert." + type = number + default = 30 +} + +variable "ssl_synthetic_min_tls_version" { + description = "Number Certificate TLS version should be equal to or greater than." + type = number + default = 1.2 +} + +variable "ssl_synthetic_max_response_time" { + description = "Number of milliseconds host response time should be less than." + type = number + default = 2000 +} + +variable "ssl_synthetic_renotify_interval" { + description = "How often SSL Synthetic Test should renotify in minutes." + type = number + default = 10080 +} + +variable "ssl_synthetic_tick_every" { + description = "How often SSL Synthetic Test should run in seconds." + type = number + default = 86400 +} + +variable "ssl_synthetic_accept_self_signed" { + description = "Whether or not SSL Synthetic Test should allow self signed certificates." + type = bool + default = false +} + +variable "ssl_synthetic_tls_check_enabled" { + type = bool + description = "Enable TLS version checking in the synthetic test" + default = false +} + +variable "ssl_synthetic_response_time_check_enabled" { + type = bool + description = "Enable response time checking in the synthetic test" + default = false +} diff --git a/synthetics/ssl/versions.tf b/synthetics/ssl/versions.tf new file mode 120000 index 0000000..cbeda73 --- /dev/null +++ b/synthetics/ssl/versions.tf @@ -0,0 +1 @@ +../../common/versions.tf \ No newline at end of file
"aws:us-east-1"
]