Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Conversation

chrisberkhout
Copy link
Contributor

@chrisberkhout chrisberkhout commented May 24, 2024

Proposed commit message

[SSI integrations] Set `response.split.ignore_empty_value: true` (#)

For integrations that use `response.split` options to split on a list,
don't explicitly set the `ignore_empty_value` option, and don't keep
parent fields, we usually don't want to index a document if the target
list is empty.

However, the default functionality is to index the whole document if
the split target is empty, so this change sets
`ignore_empty_value: true` explicitly.

The [`response.split` documentation][1] says:

> If the split target is empty the parent document will be kept.
> If documents with empty splits should be dropped, the
> `ignore_empty_value` option should be set to `true`.
 
[1]: https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-httpjson.html#response-split

Discussion

The effects of the somewhat counterintuitive default split behavior are sometimes reported by users, as was the case in #9705.

I used the following Python script to identify and update relevant cases.

fix.py
#!/usr/bin/env python3

import os
import fnmatch
import re
import yaml


# define functions to generate and transform items

def yml_hbs_files(root=os.getcwd()):
    files = []
    for dirpath, dirnames, filenames in os.walk(root):
        for filename in fnmatch.filter(filenames, "*.yml.hbs"):
            files.append({ "file": os.path.join(dirpath, filename) })
    return files

def add_split_config(item):
    config = []
    with open(item["file"], 'r') as file:
        is_split_config = False
        for line in file.readlines():
            if line.startswith('response.split:'):
                is_split_config = True
                config = [] # one case has two configs and it's the second one that matters
            elif not line.startswith(' ') and not line.startswith('\t'):
                is_split_config = False
            if is_split_config:
                config.append(line)
    result = item.copy()
    result["split_config"] = config
    return result

def add_not_templated(item):
    templated = False
    for line in item["split_config"]:
        if re.match(r".*{{.*}}.*", line):
            templated = True
    result = item.copy()
    result["not_templated"] = not templated
    return result

def add_yaml(item):
    parsed = None
    if len(item["split_config"]) > 0 and item["not_templated"]:
        parsed = yaml.safe_load("".join(item["split_config"]))
    result = item.copy()
    result["yaml"] = parsed
    return result

def add_is_list_type(item):
    is_list_type = False
    if item["yaml"]:
        is_list_type = item["yaml"]["response.split"].get("type", "list") == "list"
    result = item.copy()
    result["is_list_type"] = is_list_type
    return result

def add_no_top_level_ignore_empty_value(item):
    absent = True
    if item["yaml"]:
        absent = not "ignore_empty_value" in item["yaml"]["response.split"]
    result = item.copy()
    result["no_top_level_ignore_empty_value"] = absent
    return result

def add_false_top_level_keep_parent(item):
    is_set = False
    if item["yaml"]:
        is_set = item["yaml"]["response.split"].get("keep_parent", False)
    result = item.copy()
    result["false_top_level_keep_parent"] = not is_set
    return result

def add_new_config_if_suitable(item):
    suitable = \
        len(item["split_config"]) > 0 and \
        item["not_templated"] and \
        item["is_list_type"] and \
        item["no_top_level_ignore_empty_value"] and \
        item["false_top_level_keep_parent"]
    if suitable:
        new_yaml = item["yaml"].copy()
        new_yaml["response.split"] = {}
        for k, v in item["yaml"]["response.split"].items():
            new_yaml["response.split"][k] = v
            if k == "target": # insert after top-level target param
                new_yaml["response.split"]["ignore_empty_value"] = True
        new_config = yaml.dump(new_yaml, sort_keys=False)
        result = item.copy()
        result["new_config"] = new_config
        return result
    else:
        return item


# generate and transform items
items = yml_hbs_files()
items = map(add_split_config, items)
items = map(add_not_templated, items)
items = map(add_yaml, items)
items = map(add_is_list_type, items)
items = map(add_no_top_level_ignore_empty_value, items)
items = map(add_false_top_level_keep_parent, items)
items = map(add_new_config_if_suitable, items)
items = list(items)


# print summary
selected = items.copy()
print("                                      total  selected")
print("                                  --------- ---------")
print("*.yml.hbs                         %9d %9d" % (
    len(items), len(selected)
))
selected = [i for i in selected if len(i["split_config"]) > 0]
print("has split_config:                 %9d %9d" % (
    sum(len(i["split_config"]) > 0 for i in items), len(selected)
))
selected = [i for i in selected if i["not_templated"]]
print("not_templated:                    %9d %9d" % (
    sum(i["not_templated"] for i in items), len(selected)
))
selected = [i for i in selected if i["is_list_type"]]
print("is_list_type:                     %9d %9d" % (
    sum(i["is_list_type"] for i in items), len(selected)
))
selected = [i for i in selected if i["no_top_level_ignore_empty_value"]]
print("no_top_level_ignore_empty_value:  %9d %9d" % (
    sum(i["no_top_level_ignore_empty_value"] for i in items), len(selected)
))
selected = [i for i in selected if i["false_top_level_keep_parent"]]
print("false_top_level_keep_parent:      %9d %9d" % (
    sum(i["false_top_level_keep_parent"] for i in items), len(selected)
))
selected = [i for i in selected if "new_config" in i]
print("new_config:                       %9d %9d" % (
    sum("new_config" in i for i in items), len(selected)
))
print()


# # print details
# for item in items:
#     if (
#         len(item["split_config"]) > 0 and \
#         item["not_templated"] and \
#         item["is_list_type"] and \
#         item["no_top_level_ignore_empty_value"] and \
#         item["false_top_level_keep_parent"] and \
#         "new_config" in item and \
#         True
#     ):
#         print(item["file"])
#         print()
#         print("".join(item["split_config"]))
#         print(item.get("new_config"))
#         print()

# # apply changes
# for item in items:
#     if "new_config" in item:
#         with open(item["file"], 'r', encoding='utf-8') as f:
#             content = f.read()
#         updated_content = content.replace("".join(item["split_config"]), item["new_config"])
#         with open(item["file"], 'w', encoding='utf-8') as f:
#             f.write(updated_content)
#         print("%s updated" % item["file"])

It's summary was:

                                      total  selected
                                  --------- ---------
*.yml.hbs                              1061      1061
has split_config:                       186       186
not_templated:                         1060       185
is_list_type:                           133       133
no_top_level_ignore_empty_value:       1032       105
false_top_level_keep_parent:           1057       101
new_config:                             101       101

I left out the AWS Security Hub changes already handled in #9705.

Some minor formatting changes are included.

Checklist

  • I have reviewed tips for building integrations and this pull request is aligned with them.
  • I have verified that all data streams collect metrics or logs.
  • I have added an entry to my package's changelog.yml file.
  • I have verified that Kibana version constraints are current according to guidelines.

Related issues

@chrisberkhout chrisberkhout added enhancement New feature or request Team:Security-Service Integrations Security Service Integrations team [elastic/security-service-integrations] labels May 24, 2024
@chrisberkhout chrisberkhout self-assigned this May 24, 2024
@chrisberkhout chrisberkhout requested a review from a team as a code owner May 24, 2024 08:36
@elasticmachine
Copy link

Pinging @elastic/security-service-integrations (Team:Security-Service Integrations)

@elasticmachine
Copy link

🚀 Benchmarks report

To see the full report comment with /test benchmark fullreport

@chrisberkhout
Copy link
Contributor Author

@efd6 This is the same as #9815, except only for SSI-only integrations and with version bumps and changelog updates. The commits separate out and revert the non-SSI changes.

@chrisberkhout chrisberkhout requested a review from efd6 May 24, 2024 09:45
@chrisberkhout chrisberkhout merged commit 6595903 into elastic:main May 27, 2024
Copy link

Quality Gate passed Quality Gate passed

Issues
0 New issues
0 Fixed issues
0 Accepted issues

Measures
0 Security Hotspots
No data about Coverage
No data about Duplication

See analysis details on SonarQube

@elasticmachine
Copy link

💚 Build Succeeded

History

cc @chrisberkhout

@chrisberkhout chrisberkhout deleted the split-ignore-empty-ssi branch May 27, 2024 09:08
@chrisberkhout chrisberkhout restored the split-ignore-empty-ssi branch May 27, 2024 09:08
@elasticmachine
Copy link

Package 1password - 1.28.0 containing this change is available at https://epr.elastic.co/search?package=1password

@elasticmachine
Copy link

Package atlassian_jira - 1.25.0 containing this change is available at https://epr.elastic.co/search?package=atlassian_jira

@elasticmachine
Copy link

Package bitwarden - 1.12.0 containing this change is available at https://epr.elastic.co/search?package=bitwarden

@elasticmachine
Copy link

Package carbon_black_cloud - 2.1.0 containing this change is available at https://epr.elastic.co/search?package=carbon_black_cloud

@elasticmachine
Copy link

Package cisa_kevs - 1.1.0 containing this change is available at https://epr.elastic.co/search?package=cisa_kevs

@elasticmachine
Copy link

Package cloudflare - 2.26.0 containing this change is available at https://epr.elastic.co/search?package=cloudflare

@elasticmachine
Copy link

Package forgerock - 1.16.0 containing this change is available at https://epr.elastic.co/search?package=forgerock

@elasticmachine
Copy link

Package google_scc - 1.3.0 containing this change is available at https://epr.elastic.co/search?package=google_scc

@elasticmachine
Copy link

Package ti_cybersixgill - 1.28.0 containing this change is available at https://epr.elastic.co/search?package=ti_cybersixgill

@elasticmachine
Copy link

Package ti_eset - 1.1.0 containing this change is available at https://epr.elastic.co/search?package=ti_eset

@elasticmachine
Copy link

Package ti_mandiant_advantage - 1.2.0 containing this change is available at https://epr.elastic.co/search?package=ti_mandiant_advantage

@elasticmachine
Copy link

Package ti_misp - 1.33.0 containing this change is available at https://epr.elastic.co/search?package=ti_misp

@elasticmachine
Copy link

Package ti_rapid7_threat_command - 1.16.0 containing this change is available at https://epr.elastic.co/search?package=ti_rapid7_threat_command

@elasticmachine
Copy link

Package ti_threatq - 1.27.0 containing this change is available at https://epr.elastic.co/search?package=ti_threatq

@elasticmachine
Copy link

Package trend_micro_vision_one - 1.18.0 containing this change is available at https://epr.elastic.co/search?package=trend_micro_vision_one

@elasticmachine
Copy link

Package zerofox - 1.24.0 containing this change is available at https://epr.elastic.co/search?package=zerofox

@elasticmachine
Copy link

Package zeronetworks - 1.14.0 containing this change is available at https://epr.elastic.co/search?package=zeronetworks

@andrewkroh andrewkroh added Integration:cisa_kevs CISA Known Exploited Vulnerabilities (Community supported) Integration:forgerock ForgeRock Integration:microsoft_exchange_online_message_trac Microsoft Exchange Online Message Trace Integration:panw_cortex_xdr Palo Alto Cortex XDR Integration:rapid7_insightvm Rapid7 InsightVM Integration:sentinel_one SentinelOne Integration:mimecast Mimecast (Partner supported) Integration:ping_one PingOne Integration:proofpoint_tap Proofpoint TAP Integration:1password 1Password (Partner supported) Integration:cloudflare Cloudflare (Community supported) Integration:google_scc Google Security Command Center Integration:infoblox_bloxone_ddi Infoblox BloxOne DDI Integration:m365_defender Microsoft Defender XDR Integration:atlassian_jira Atlassian Jira (Community supported) Integration:bitwarden Bitwarden Integration:carbon_black_cloud VMware Carbon Black Cloud Integration:google_workspace Google Workspace Integration:lumos Lumos (Partner supported) labels Jul 22, 2024
@chrisberkhout chrisberkhout deleted the split-ignore-empty-ssi branch September 13, 2024 12:17
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request Integration:atlassian_jira Atlassian Jira (Community supported) Integration:bitwarden Bitwarden Integration:carbon_black_cloud VMware Carbon Black Cloud Integration:cisa_kevs CISA Known Exploited Vulnerabilities (Community supported) Integration:cloudflare Cloudflare (Community supported) Integration:forgerock ForgeRock Integration:google_scc Google Security Command Center Integration:google_workspace Google Workspace Integration:infoblox_bloxone_ddi Infoblox BloxOne DDI Integration:lumos Lumos (Partner supported) Integration:m365_defender Microsoft Defender XDR Integration:microsoft_exchange_online_message_trac Microsoft Exchange Online Message Trace Integration:mimecast Mimecast (Partner supported) Integration:panw_cortex_xdr Palo Alto Cortex XDR Integration:ping_one PingOne Integration:proofpoint_tap Proofpoint TAP Integration:rapid7_insightvm Rapid7 InsightVM Integration:sentinel_one SentinelOne Integration:1password 1Password (Partner supported) Team:Security-Service Integrations Security Service Integrations team [elastic/security-service-integrations]
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[Proofpoint TAP] Empty array response should not generate an event
4 participants