From 1cd440f7db63543ea385d183f77be08829bc5aa9 Mon Sep 17 00:00:00 2001 From: Jeffrey Aven Date: Fri, 14 Feb 2025 11:08:09 +1100 Subject: [PATCH 1/2] Removed virtual environment from repo --- .gitignore | 1 + README.md | 6 +- .../databricks/all-purpose-cluster/README.md | 28 ++-- .../all-purpose-cluster/convenience.sh | 6 +- .../iam/cross_account_role.iql} | 0 .../s3/workspace_bucket.iql} | 0 .../s3/workspace_bucket_policy.iql} | 0 .../resources/aws/vpc/elastic_ip.iql | 56 ++++++++ .../vpc/get_main_route_table_id.iql} | 0 .../vpc/inet_gateway.iql} | 0 .../vpc/inet_gw_attachment.iql} | 0 .../vpc/inet_route.iql} | 0 .../resources/aws/vpc/nat_gateway.iql | 53 +++++++ .../vpc/security_group.iql} | 0 .../vpc/security_group_rules.iql} | 0 .../vpc/subnet.iql} | 0 .../vpc/subnet_rt_assn.iql} | 0 .../vpc/tag_main_vpc_route_table.iql} | 0 .../{aws_vpc.iql => aws/vpc/vpc.iql} | 0 .../credentials.iql} | 0 .../databricks_account/get_users.iql | 6 + .../network.iql} | 0 .../storage_configuration.iql} | 0 .../update_group_membership.iql | 6 + .../workspace.iql} | 0 .../databricks_account/workspace_group.iql | 31 +++++ .../workspace_permission_assignments.iql | 32 +++++ .../all_purpose_cluster.iql | 52 +++++++ .../all-purpose-cluster/stackql_manifest.yml | 130 ++++++++++++++---- stackql_deploy/cli.py | 1 + stackql_deploy/cmd/base.py | 1 + stackql_deploy/cmd/build.py | 1 + stackql_deploy/cmd/teardown.py | 1 + stackql_deploy/cmd/test.py | 1 + stackql_deploy/lib/bootstrap.py | 1 + stackql_deploy/lib/config.py | 23 ++++ stackql_deploy/lib/templating.py | 1 + stackql_deploy/lib/utils.py | 1 + 38 files changed, 392 insertions(+), 46 deletions(-) rename examples/databricks/all-purpose-cluster/resources/{aws_iam_cross_account_role.iql => aws/iam/cross_account_role.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_s3_workspace_bucket.iql => aws/s3/workspace_bucket.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_s3_workspace_bucket_policy.iql => aws/s3/workspace_bucket_policy.iql} (100%) create mode 100644 examples/databricks/all-purpose-cluster/resources/aws/vpc/elastic_ip.iql rename examples/databricks/all-purpose-cluster/resources/{aws_get_main_route_table_id.iql => aws/vpc/get_main_route_table_id.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_inet_gateway.iql => aws/vpc/inet_gateway.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_inet_gw_attachment.iql => aws/vpc/inet_gw_attachment.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_inet_route.iql => aws/vpc/inet_route.iql} (100%) create mode 100644 examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_security_group.iql => aws/vpc/security_group.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_security_group_rules.iql => aws/vpc/security_group_rules.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_subnet.iql => aws/vpc/subnet.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc_subnet_rt_assn.iql => aws/vpc/subnet_rt_assn.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_tag_main_vpc_route_table.iql => aws/vpc/tag_main_vpc_route_table.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{aws_vpc.iql => aws/vpc/vpc.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{databricks_credentials.iql => databricks_account/credentials.iql} (100%) create mode 100644 examples/databricks/all-purpose-cluster/resources/databricks_account/get_users.iql rename examples/databricks/all-purpose-cluster/resources/{databricks_network.iql => databricks_account/network.iql} (100%) rename examples/databricks/all-purpose-cluster/resources/{databricks_storage_configuration.iql => databricks_account/storage_configuration.iql} (100%) create mode 100644 examples/databricks/all-purpose-cluster/resources/databricks_account/update_group_membership.iql rename examples/databricks/all-purpose-cluster/resources/{databricks_workspace.iql => databricks_account/workspace.iql} (100%) create mode 100644 examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_group.iql create mode 100644 examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_permission_assignments.iql create mode 100644 examples/databricks/all-purpose-cluster/resources/databricks_workspace/all_purpose_cluster.iql diff --git a/.gitignore b/.gitignore index 8c87ce0..213615b 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,4 @@ instance/ docs/_build/ .DS_Store +myenv/ diff --git a/README.md b/README.md index c295d43..15065dd 100644 --- a/README.md +++ b/README.md @@ -265,14 +265,16 @@ To distribute **stackql-deploy** on PyPI, you'll need to ensure that you have al First, ensure you have the latest versions of `setuptools` and `wheel` installed: -``` +```bash +python3 -m venv venv +source venv/bin/activate # pip install --upgrade setuptools wheel pip install --upgrade build ``` Then, navigate to your project root directory and build the distribution files: -``` +```bash rm dist/stackql_deploy* python3 -m build # or diff --git a/examples/databricks/all-purpose-cluster/README.md b/examples/databricks/all-purpose-cluster/README.md index 6a80f71..404f7bc 100644 --- a/examples/databricks/all-purpose-cluster/README.md +++ b/examples/databricks/all-purpose-cluster/README.md @@ -26,7 +26,7 @@ Now, is is convenient to use environment variables for context. Note that for o ```bash #!/usr/bin/env bash -export ASSETS_AWS_REGION='us-east-1' # or wherever you want +export AWS_REGION='us-east-1' # or wherever you want export AWS_ACCOUNT_ID='' export DATABRICKS_ACCOUNT_ID='' export DATABRICKS_AWS_ACCOUNT_ID='' @@ -46,28 +46,20 @@ export AWS_ACCESS_KEY_ID='' Now, let us do some sanity checks and housekeeping with `stackql`. This is purely optional. From the root of this repository: ``` - source examples/databricks/all-purpose-cluster/convenience.sh - stackql shell - ``` This will start a `stackql` interactive shell. Here are some commands you can run (I will not place output here, that will be shared in a corresponding video): ```sql - registry pull databricks_account v24.12.00279; - registry pull databricks_workspace v24.12.00279; -- This will fail if accounts, subscription, or credentials are in error. select account_id FROM databricks_account.provisioning.credentials WHERE account_id = ''; - - select account_id, workspace_name, workspace_id, workspace_status from databricks_account.provisioning.workspaces where account_id = ''; - ``` For extra credit, you can (asynchronously) delete the unnecessary workspace with `delete from databricks_account.provisioning.workspaces where account_id = '' and workspace_id = '';`, where you obtain the workspace id from the above query. I have noted that due to some reponse caching it takes a while to disappear from select queries (much longer than disappearance from the web page), and you may want to bounce the `stackql` session to hurry things along. This is not happening on the `stackql` side, but session bouncing forces a token refresh which can help cache busting. @@ -77,20 +69,20 @@ For extra credit, you can (asynchronously) delete the unnecessary workspace with Time to get down to business. From the root of this repository: ```bash - +python3 -m venv myenv source examples/databricks/all-purpose-cluster/convenience.sh - -source ./.venv/bin/activate - - +source venv/bin/activate +pip install stackql-deploy ``` +> alternatively set the `AWS_REGION`, `AWS_ACCOUNT_ID`, `DATABRICKS_ACCOUNT_ID`, `DATABRICKS_AWS_ACCOUNT_ID` along with provider credentials `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `DATABRICKS_CLIENT_ID`, `DATABRICKS_CLIENT_SECRET` + Then, do a dry run (good for catching **some** environmental issues): ```bash stackql-deploy build \ examples/databricks/all-purpose-cluster dev \ --e AWS_REGION=${ASSETS_AWS_REGION} \ +-e AWS_REGION=${AWS_REGION} \ -e AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} \ -e DATABRICKS_ACCOUNT_ID=${DATABRICKS_ACCOUNT_ID} \ -e DATABRICKS_AWS_ACCOUNT_ID=${DATABRICKS_AWS_ACCOUNT_ID} \ @@ -105,7 +97,7 @@ Now, let use do it for real: ```bash stackql-deploy build \ examples/databricks/all-purpose-cluster dev \ --e AWS_REGION=${ASSETS_AWS_REGION} \ +-e AWS_REGION=${AWS_REGION} \ -e AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} \ -e DATABRICKS_ACCOUNT_ID=${DATABRICKS_ACCOUNT_ID} \ -e DATABRICKS_AWS_ACCOUNT_ID=${DATABRICKS_AWS_ACCOUNT_ID} \ @@ -128,7 +120,7 @@ We can also use `stackql-deploy` to assess if our infra is shipshape: ```bash stackql-deploy test \ examples/databricks/all-purpose-cluster dev \ --e AWS_REGION=${ASSETS_AWS_REGION} \ +-e AWS_REGION=${AWS_REGION} \ -e AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} \ -e DATABRICKS_ACCOUNT_ID=${DATABRICKS_ACCOUNT_ID} \ -e DATABRICKS_AWS_ACCOUNT_ID=${DATABRICKS_AWS_ACCOUNT_ID} \ @@ -151,7 +143,7 @@ Now, let us teardown our `stackql-deploy` managed infra: ```bash stackql-deploy teardown \ examples/databricks/all-purpose-cluster dev \ --e AWS_REGION=${ASSETS_AWS_REGION} \ +-e AWS_REGION=${AWS_REGION} \ -e AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} \ -e DATABRICKS_ACCOUNT_ID=${DATABRICKS_ACCOUNT_ID} \ -e DATABRICKS_AWS_ACCOUNT_ID=${DATABRICKS_AWS_ACCOUNT_ID} \ diff --git a/examples/databricks/all-purpose-cluster/convenience.sh b/examples/databricks/all-purpose-cluster/convenience.sh index 81f73c7..d4913f6 100644 --- a/examples/databricks/all-purpose-cluster/convenience.sh +++ b/examples/databricks/all-purpose-cluster/convenience.sh @@ -10,9 +10,9 @@ then source "${REPOSITORY_ROOT}/examples/databricks/all-purpose-cluster/sec/env.sh" fi -if [ "${ASSETS_AWS_REGION}" = "" ]; +if [ "${AWS_REGION}" = "" ]; then - ASSETS_AWS_REGION='us-east-1' + AWS_REGION='us-east-1' fi if [ "${AWS_ACCOUNT_ID}" = "" ]; @@ -57,7 +57,7 @@ then exit 1 fi -export ASSETS_AWS_REGION +export AWS_REGION export AWS_ACCOUNT_ID export DATABRICKS_ACCOUNT_ID export DATABRICKS_AWS_ACCOUNT_ID diff --git a/examples/databricks/all-purpose-cluster/resources/aws_iam_cross_account_role.iql b/examples/databricks/all-purpose-cluster/resources/aws/iam/cross_account_role.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_iam_cross_account_role.iql rename to examples/databricks/all-purpose-cluster/resources/aws/iam/cross_account_role.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_s3_workspace_bucket.iql b/examples/databricks/all-purpose-cluster/resources/aws/s3/workspace_bucket.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_s3_workspace_bucket.iql rename to examples/databricks/all-purpose-cluster/resources/aws/s3/workspace_bucket.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_s3_workspace_bucket_policy.iql b/examples/databricks/all-purpose-cluster/resources/aws/s3/workspace_bucket_policy.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_s3_workspace_bucket_policy.iql rename to examples/databricks/all-purpose-cluster/resources/aws/s3/workspace_bucket_policy.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/elastic_ip.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/elastic_ip.iql new file mode 100644 index 0000000..d4dd982 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/elastic_ip.iql @@ -0,0 +1,56 @@ +/*+ exists */ +SELECT COUNT(*) as count FROM +( +SELECT allocation_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.eip_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t + +/*+ create */ +INSERT INTO aws.ec2.eips ( + NetworkBorderGroup, + Tags, + ClientToken, + region +) +SELECT +'{{ region }}', +'{{ tags }}', +'{{ idempotency_token }}', +'{{ region }}' + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count FROM +( +SELECT allocation_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.eip_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t + +/*+ exports, retries=3, retry_delay=5 */ +SELECT allocation_id as eip_allocation_id, public_ip as eip_public_id FROM +( +SELECT allocation_id, public_ip, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.eip_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t + +/*+ delete */ +DELETE FROM aws.ec2.eips +WHERE data__Identifier = '{{ eip_public_id }}|{{ eip_allocation_id}}' +AND region = '{{ region }}' diff --git a/examples/databricks/all-purpose-cluster/resources/aws_get_main_route_table_id.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_get_main_route_table_id.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_gateway.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_gateway.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_gateway.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_gateway.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_gw_attachment.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_gw_attachment.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_gw_attachment.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_gw_attachment.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_route.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_route.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_inet_route.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/inet_route.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql new file mode 100644 index 0000000..27c7136 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql @@ -0,0 +1,53 @@ +/*+ exists */ +SELECT COUNT(*) as count FROM +( +SELECT nat_gateway_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.nat_gateway_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t + +/*+ create */ +INSERT INTO aws.ec2.nat_gateways ( + AllocationId, + SubnetId, + Tags, + region +) +SELECT + '{{ eip_allocation_id }}', + '{{ aws_vpc_subnet1_id }}', + '{{ tags }}', + '{{ region }}'; + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count FROM +( +SELECT nat_gateway_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.nat_gateway_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t + +/*+ exports, retries=3, retry_delay=5 */ +SELECT nat_gateway_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.nat_gateway_tags +WHERE region = '{{ region }}' +GROUP BY allocation_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' + +/*+ delete */ +DELETE FROM aws.ec2.nat_gateways +WHERE data__Identifier = '{{ nat_gateway_id }}' +AND region = '{{ region }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_security_group.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/security_group.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_security_group.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/security_group.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_security_group_rules.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/security_group_rules.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_security_group_rules.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/security_group_rules.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_subnet.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/subnet.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_subnet.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/subnet.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc_subnet_rt_assn.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/subnet_rt_assn.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc_subnet_rt_assn.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/subnet_rt_assn.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_tag_main_vpc_route_table.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_tag_main_vpc_route_table.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws_vpc.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/aws_vpc.iql rename to examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc.iql diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_credentials.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/credentials.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/databricks_credentials.iql rename to examples/databricks/all-purpose-cluster/resources/databricks_account/credentials.iql diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_account/get_users.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/get_users.iql new file mode 100644 index 0000000..2a978d7 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/databricks_account/get_users.iql @@ -0,0 +1,6 @@ +/*+ exports, retries=3, retry_delay=5 */ +SELECT +JSON_GROUP_ARRAY(JSON_OBJECT('value', id)) as databricks_workspace_group_members +FROM databricks_account.iam.users +WHERE account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce' +AND userName in {{ users | sql_list }}; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_network.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/network.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/databricks_network.iql rename to examples/databricks/all-purpose-cluster/resources/databricks_account/network.iql diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_storage_configuration.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/storage_configuration.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/databricks_storage_configuration.iql rename to examples/databricks/all-purpose-cluster/resources/databricks_account/storage_configuration.iql diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_account/update_group_membership.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/update_group_membership.iql new file mode 100644 index 0000000..375d926 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/databricks_account/update_group_membership.iql @@ -0,0 +1,6 @@ +/*+ command */ +update databricks_account.iam.groups +set data__schemas = '["urn:ietf:params:scim:api:messages:2.0:PatchOp"]', +data__Operations = '[{"op": "replace", "path": "members", "value": {{ databricks_workspace_group_members }} }]' +WHERE account_id = '{{ databricks_account_id }}' +AND id = '{{ databricks_group_id }}'; diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_workspace.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql similarity index 100% rename from examples/databricks/all-purpose-cluster/resources/databricks_workspace.iql rename to examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_group.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_group.iql new file mode 100644 index 0000000..4d3494a --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_group.iql @@ -0,0 +1,31 @@ +/*+ exists */ +SELECT COUNT(*) as count +FROM databricks_account.iam.groups +WHERE account_id = '{{ databricks_account_id }}' +AND displayName = '{{ display_name }}' + +/*+ create */ +INSERT INTO databricks_account.iam.groups ( +account_id, +data__displayName +) +SELECT +'{{ databricks_account_id }}', +'{{ display_name }}' + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count +FROM databricks_account.iam.groups +WHERE account_id = '{{ databricks_account_id }}' +AND displayName = '{{ display_name }}' + +/*+ exports */ +SELECT id AS databricks_group_id +FROM databricks_account.iam.groups +WHERE account_id = '{{ databricks_account_id }}' +AND displayName = '{{ display_name }}' + +/*+ delete */ +DELETE FROM databricks_account.iam.groups +WHERE account_id = '{{ databricks_account_id }}' AND +id = '{{ databricks_group_id }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_permission_assignments.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_permission_assignments.iql new file mode 100644 index 0000000..00387e3 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace_permission_assignments.iql @@ -0,0 +1,32 @@ +/*+ exists */ +SELECT COUNT(*) as count +FROM databricks_account.iam.workspace_permission_assignments +WHERE account_id = '{{ databricks_account_id }}' AND +workspace_id = '{{ databricks_workspace_id }}' +AND JSON_EXTRACT(principal, '$.principal_id') = {{ databricks_group_id }} + +/*+ createorupdate */ +INSERT INTO databricks_account.iam.workspace_permission_assignments ( +account_id, +principal_id, +workspace_id, +data__permissions +) +SELECT +'{{ databricks_account_id }}', +'{{ databricks_group_id }}', +'{{ databricks_workspace_id }}', +'["ADMIN"]' + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count +FROM databricks_account.iam.workspace_permission_assignments +WHERE account_id = '{{ databricks_account_id }}' AND +workspace_id = '{{ databricks_workspace_id }}' +AND JSON_EXTRACT(principal, '$.principal_id') = {{ databricks_group_id }} + +/*+ delete */ +DELETE FROM databricks_account.iam.workspace_permission_assignments +WHERE account_id = '{{ databricks_account_id }}' AND +principal_id = '{{ databricks_group_id }}' AND +workspace_id = '{{ databricks_workspace_id }}' \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_workspace/all_purpose_cluster.iql b/examples/databricks/all-purpose-cluster/resources/databricks_workspace/all_purpose_cluster.iql new file mode 100644 index 0000000..44b3703 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/databricks_workspace/all_purpose_cluster.iql @@ -0,0 +1,52 @@ +/*+ exists */ +SELECT COUNT(*) as count +FROM databricks_workspace.compute.clusters +WHERE deployment_name = '{{ databricks_deployment_name }}' +AND cluster_name = '{{ cluster_name }}' + +/*+ create */ +INSERT INTO databricks_workspace.compute.clusters ( +deployment_name, +data__cluster_name, +data__num_workers, +data__is_single_node, +data__kind, +data__spark_version, +data__node_type_id, +data__data_security_mode, +data__runtime_engine, +data__single_user_name, +data__aws_attributes, +data__custom_tags +) +SELECT +'{{ databricks_deployment_name }}', +'{{ cluster_name }}', + {{ num_workers }}, + {{ is_single_node }}, +'{{ kind }}', +'{{ spark_version }}', +'{{ node_type_id }}', +'{{ data_security_mode }}', +'{{ runtime_engine }}', +'{{ single_user_name }}', +'{{ aws_attributes }}', +'{{ custom_tags }}' + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count +FROM databricks_workspace.compute.clusters +WHERE deployment_name = '{{ databricks_deployment_name }}' +AND cluster_name = '{{ cluster_name }}' + +/*+ exports */ +SELECT cluster_id AS databricks_cluster_id, +state AS databricks_cluster_state +FROM databricks_workspace.compute.clusters +WHERE deployment_name = '{{ databricks_deployment_name }}' +AND cluster_name = '{{ cluster_name }}' + +/*+ delete */ +DELETE FROM databricks_workspace.compute.clusters +WHERE deployment_name = '{{ databricks_deployment_name }}' +AND cluster_id = '{{ databricks_cluster_id }}' diff --git a/examples/databricks/all-purpose-cluster/stackql_manifest.yml b/examples/databricks/all-purpose-cluster/stackql_manifest.yml index 4d1d463..1e6b99d 100644 --- a/examples/databricks/all-purpose-cluster/stackql_manifest.yml +++ b/examples/databricks/all-purpose-cluster/stackql_manifest.yml @@ -30,7 +30,7 @@ resources: # ==================================================================================== # AWS IAM # ==================================================================================== - - name: aws_iam_cross_account_role + - name: aws/iam/cross_account_role props: - name: role_name value: "{{ stack_name }}-{{ stack_env }}-role" @@ -137,7 +137,7 @@ resources: PolicyName: "{{ stack_name }}-{{ stack_env }}-policy" exports: - aws_iam_cross_account_role_arn - - name: databricks_credentials + - name: databricks_account/credentials props: - name: credentials_name value: "{{ stack_name }}-{{ stack_env }}-credentials" @@ -151,7 +151,7 @@ resources: # ==================================================================================== # AWS VPC Networking # ==================================================================================== - - name: aws_vpc + - name: aws/vpc/vpc props: - name: cidr_block values: @@ -171,11 +171,11 @@ resources: value: 019447a0-b84a-7b7f-bca5-2ee320207e51 exports: - vpc_id - - name: aws_get_main_route_table_id + - name: aws/vpc/get_main_route_table_id type: query exports: - route_table_id - - name: aws_tag_main_vpc_route_table + - name: aws/vpc/tag_main_vpc_route_table type: command props: - name: tags @@ -183,8 +183,8 @@ resources: - Key: Name Value: "{{ stack_name }}-{{ stack_env }}-route-table" merge: ['global_tags'] - - name: aws_vpc_subnet1 - file: aws_vpc_subnet.iql + - name: aws/vpc/subnet1 + file: aws/vpc/subnet.iql props: - name: availability_zone value: "us-east-1a" @@ -204,8 +204,8 @@ resources: - global_tags exports: - subnet_id: aws_vpc_subnet1_id - - name: aws_vpc_subnet2 - file: aws_vpc_subnet.iql + - name: aws/vpc/subnet2 + file: aws/vpc/subnet.iql props: - name: availability_zone value: "us-east-1b" @@ -225,7 +225,7 @@ resources: - global_tags exports: - subnet_id: aws_vpc_subnet2_id - - name: aws_vpc_inet_gateway + - name: aws/vpc/inet_gateway props: - name: tags value: @@ -236,10 +236,10 @@ resources: value: 019447a5-f076-75f8-9173-092df5a66d35 exports: - internet_gateway_id - - name: aws_vpc_inet_gw_attachment + - name: aws/vpc/inet_gw_attachment props: [] - - name: aws_vpc_subnet_rt_assn1 - file: aws_vpc_subnet_rt_assn.iql + - name: aws/vpc/subnet_rt_assn1 + file: aws/vpc/subnet_rt_assn.iql props: - name: subnet_id value: "{{ aws_vpc_subnet1_id }}" @@ -247,8 +247,8 @@ resources: value: 019447aa-1c7a-775b-91dc-04db7c49f4a7 exports: - route_table_assn_id: aws_vpc_subnet1_rt_assn_id - - name: aws_vpc_subnet_rt_assn2 - file: aws_vpc_subnet_rt_assn.iql + - name: aws/vpc/subnet_rt_assn2 + file: aws/vpc/subnet_rt_assn.iql props: - name: subnet_id value: "{{ aws_vpc_subnet2_id }}" @@ -256,11 +256,34 @@ resources: value: 019447ab-1302-754a-a580-99071f1ad814 exports: - route_table_assn_id: aws_vpc_subnet2_rt_assn_id - - name: aws_vpc_inet_route + - name: aws/vpc/inet_route props: [] exports: - inet_route_indentifer - - name: aws_vpc_security_group + - name: aws/vpc/elastic_ip + props: + - name: tags + value: + - Key: Name + Value: "{{ stack_name }}-{{ stack_env }}-eip" + merge: ['global_tags'] + - name: idempotency_token + value: 01945908-b80d-7e51-b52c-5e93dea9cbdb + exports: + - eip_allocation_id + - eip_public_id + - name: aws/vpc/nat_gateway + props: + - name: tags + value: + - Key: Name + Value: "{{ stack_name }}-{{ stack_env }}-nat-gateway" + merge: ['global_tags'] + - name: idempotency_token + value: 019447a5-f076-75f8-9173-092df5a66d35 + exports: + - nat_gateway_id + - name: aws/vpc/security_group props: - name: group_name value: "{{ stack_name }}-{{ stack_env }}-sg" @@ -273,7 +296,7 @@ resources: merge: ['global_tags'] exports: - security_group_id - - name: aws_vpc_security_group_rules + - name: aws/vpc/security_group_rules props: - name: security_group_ingress value: @@ -302,7 +325,7 @@ resources: FromPort: -1 ToPort: -1 IpProtocol: "-1" - - name: databricks_network + - name: databricks_account/network props: - name: databricks_network_name value: "{{ stack_name }}-{{ stack_env }}-network" @@ -318,7 +341,7 @@ resources: # ==================================================================================== # AWS Storage # ==================================================================================== - - name: aws_s3_workspace_bucket + - name: aws/s3/workspace_bucket props: - name: bucket_name value: "{{ stack_name }}-{{ stack_env }}-root-bucket" @@ -344,7 +367,7 @@ resources: exports: - aws_s3_workspace_bucket_name - aws_s3_workspace_bucket_arn - - name: aws_s3_workspace_bucket_policy + - name: aws/s3/workspace_bucket_policy props: - name: policy_document value: @@ -364,7 +387,7 @@ resources: Resource: - "{{ aws_s3_workspace_bucket_arn }}/*" - "{{ aws_s3_workspace_bucket_arn }}" - - name: databricks_storage_configuration + - name: databricks_account/storage_configuration props: - name: storage_configuration_name value: "{{ stack_name }}-{{ stack_env }}-storage" @@ -376,7 +399,7 @@ resources: # ==================================================================================== # DBX Workspace # ==================================================================================== - - name: databricks_workspace + - name: databricks_account/workspace props: - name: workspace_name value: "{{ stack_name }}-{{ stack_env }}-workspace" @@ -392,3 +415,64 @@ resources: value: PREMIUM exports: - databricks_workspace_id + - name: databricks_account/workspace_group + props: + - name: display_name + value: "{{ stack_name }}-{{ stack_env }}-workspace-admins" + exports: + - databricks_group_id + - name: databricks_account/get_users + type: query + props: + - name: users + value: + - "javen@stackql.io" + - "krimmer@stackql.io" + exports: + - databricks_workspace_group_members + - name: databricks_account/update_group_membership + type: command + props: [] + - name: databricks_account/workspace_permission_assignments + props: [] + - name: databricks_workspace/all_purpose_cluster + props: + - name: cluster_name + value: single-user-single-node-cluster + - name: num_workers + value: 0 + - name: is_single_node + value: true + - name: kind + value: CLASSIC_PREVIEW + - name: spark_version + value: 15.4.x-scala2.12 + - name: node_type_id + value: m7g.large + - name: data_security_mode + value: SINGLE_USER + - name: runtime_engine + value: PHOTON + - name: single_user_name + value: javen@stackql.io + - name: aws_attributes + value: + ebs_volume_count: 1 + ebs_volume_size: 100 + - name: custom_tags + description: Additional tags for cluster resources (max 45 tags) + value: + Provisioner: stackql + StackName: "{{ stack_name }}" + StackEnv: "{{ stack_env }}" + exports: + - databricks_cluster_id + - databricks_cluster_state + + # "spark_conf": { + # "spark.databricks.cluster.profile": "singleNode", + # "spark.master": "local[*, 4]" + # }, + # "spark_env_vars": { + # "PYSPARK_PYTHON": "/databricks/python3/bin/python3" + # }, \ No newline at end of file diff --git a/stackql_deploy/cli.py b/stackql_deploy/cli.py index be2b275..11794a8 100644 --- a/stackql_deploy/cli.py +++ b/stackql_deploy/cli.py @@ -1,3 +1,4 @@ +# cli.py import click import os import sys diff --git a/stackql_deploy/cmd/base.py b/stackql_deploy/cmd/base.py index 6b38893..fda4343 100644 --- a/stackql_deploy/cmd/base.py +++ b/stackql_deploy/cmd/base.py @@ -1,3 +1,4 @@ +# cmd/base.py from ..lib.utils import ( perform_retries, run_stackql_command, diff --git a/stackql_deploy/cmd/build.py b/stackql_deploy/cmd/build.py index 1851ce1..c374586 100644 --- a/stackql_deploy/cmd/build.py +++ b/stackql_deploy/cmd/build.py @@ -1,3 +1,4 @@ +# cmd/build.py import datetime from ..lib.utils import ( catch_error_and_exit, diff --git a/stackql_deploy/cmd/teardown.py b/stackql_deploy/cmd/teardown.py index 6aeda1c..600cc85 100644 --- a/stackql_deploy/cmd/teardown.py +++ b/stackql_deploy/cmd/teardown.py @@ -1,3 +1,4 @@ +# cmd/teardown.py import datetime from ..lib.utils import ( catch_error_and_exit, diff --git a/stackql_deploy/cmd/test.py b/stackql_deploy/cmd/test.py index 35d4b12..dbd19b0 100644 --- a/stackql_deploy/cmd/test.py +++ b/stackql_deploy/cmd/test.py @@ -1,3 +1,4 @@ +# cmd/test.py import datetime from ..lib.utils import ( catch_error_and_exit, diff --git a/stackql_deploy/lib/bootstrap.py b/stackql_deploy/lib/bootstrap.py index d1b79b9..3ace615 100644 --- a/stackql_deploy/lib/bootstrap.py +++ b/stackql_deploy/lib/bootstrap.py @@ -1,3 +1,4 @@ +# lib/bootstrap.py import logging # Set up logging at the root level diff --git a/stackql_deploy/lib/config.py b/stackql_deploy/lib/config.py index 22f0fab..c6087e6 100644 --- a/stackql_deploy/lib/config.py +++ b/stackql_deploy/lib/config.py @@ -1,3 +1,4 @@ +# lib/config.py import os import yaml import json @@ -92,6 +93,27 @@ def generate_patch_document(properties): return json.dumps(patch_doc) +def sql_list(input_data): + # If the input is already a string representation of a list, parse it + if isinstance(input_data, str): + try: + import json + # Parse the string as JSON array + python_list = json.loads(input_data) + except json.JSONDecodeError: + # If it's not valid JSON, treat it as a single item + python_list = [input_data] + else: + python_list = input_data + + # Handle empty list case + if not python_list: + return '(NULL)' + + # Convert each item to string, wrap in quotes, join with commas + quoted_items = [f"'{str(item)}'" for item in python_list] + return f"({','.join(quoted_items)})" + # END jinja filters def to_sql_compatible_json(value): @@ -282,6 +304,7 @@ def setup_environment(stack_dir, logger): env.filters['base64_encode'] = base64_encode env.filters['generate_patch_document'] = generate_patch_document env.filters['from_json'] = from_json + env.filters['sql_list'] = sql_list env.globals['uuid'] = lambda: str(uuid.uuid4()) logger.debug("custom Jinja filters registered: %s", env.filters.keys()) return env diff --git a/stackql_deploy/lib/templating.py b/stackql_deploy/lib/templating.py index cc73ca8..85d0efb 100644 --- a/stackql_deploy/lib/templating.py +++ b/stackql_deploy/lib/templating.py @@ -1,3 +1,4 @@ +# lib/templating.py import json import os from .utils import catch_error_and_exit diff --git a/stackql_deploy/lib/utils.py b/stackql_deploy/lib/utils.py index 59d041a..a1a1b06 100644 --- a/stackql_deploy/lib/utils.py +++ b/stackql_deploy/lib/utils.py @@ -1,3 +1,4 @@ +# lib/utils.py import time import json import sys From c6bc6ef3e767eefbd34c9465bd66428eb5896238 Mon Sep 17 00:00:00 2001 From: Jeffrey Aven Date: Sat, 26 Apr 2025 13:29:46 +1000 Subject: [PATCH 2/2] fixed databricks example --- .../{cross_account_role.iql => iam_role.iql} | 2 +- .../aws/vpc/get_main_route_table_id.iql | 2 +- .../resources/aws/vpc/nat_gateway.iql | 2 +- .../resources/aws/vpc/nat_inet_route.iql | 41 +++ .../resources/aws/vpc/route_table.iql | 54 ++++ .../aws/vpc/tag_main_vpc_route_table.iql | 2 +- .../resources/aws/vpc/vpc_endpoint.iql | 60 ++++ .../databricks_account/workspace.iql | 6 +- .../all-purpose-cluster/stackql_manifest.yml | 293 +++++++++++++++--- 9 files changed, 413 insertions(+), 49 deletions(-) rename examples/databricks/all-purpose-cluster/resources/aws/iam/{cross_account_role.iql => iam_role.iql} (93%) create mode 100644 examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_inet_route.iql create mode 100644 examples/databricks/all-purpose-cluster/resources/aws/vpc/route_table.iql create mode 100644 examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc_endpoint.iql diff --git a/examples/databricks/all-purpose-cluster/resources/aws/iam/cross_account_role.iql b/examples/databricks/all-purpose-cluster/resources/aws/iam/iam_role.iql similarity index 93% rename from examples/databricks/all-purpose-cluster/resources/aws/iam/cross_account_role.iql rename to examples/databricks/all-purpose-cluster/resources/aws/iam/iam_role.iql index f7e8750..ba2d140 100644 --- a/examples/databricks/all-purpose-cluster/resources/aws/iam/cross_account_role.iql +++ b/examples/databricks/all-purpose-cluster/resources/aws/iam/iam_role.iql @@ -49,7 +49,7 @@ AND path = '{{ path }}'; /*+ exports, retries=3, retry_delay=5 */ SELECT -arn as aws_iam_cross_account_role_arn +arn as aws_iam_role_arn FROM aws.iam.roles WHERE data__Identifier = '{{ role_name }}' diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql index 72595ff..7679dd2 100644 --- a/examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/get_main_route_table_id.iql @@ -1,6 +1,6 @@ /*+ exports, retries=3, retry_delay=5 */ SELECT -route_table_id +route_table_id as main_route_table_id FROM aws.ec2.route_tables WHERE region = '{{ region }}' AND vpc_id = '{{ vpc_id }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql index 27c7136..081fbd2 100644 --- a/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_gateway.iql @@ -20,7 +20,7 @@ INSERT INTO aws.ec2.nat_gateways ( ) SELECT '{{ eip_allocation_id }}', - '{{ aws_vpc_subnet1_id }}', + '{{ nat_subnet_id }}', '{{ tags }}', '{{ region }}'; diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_inet_route.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_inet_route.iql new file mode 100644 index 0000000..9e750f6 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/nat_inet_route.iql @@ -0,0 +1,41 @@ +/*+ exists */ +SELECT COUNT(*) as count FROM +( +SELECT data__Identifier +FROM aws.ec2.routes +WHERE region = '{{ region }}' +AND data__Identifier = '{{ route_table_id }}|0.0.0.0/0' +) t + +/*+ create */ +INSERT INTO aws.ec2.routes ( + DestinationCidrBlock, + NatGatewayId, + RouteTableId, + region +) +SELECT + '0.0.0.0/0', + '{{ nat_gateway_id }}', + '{{ route_table_id }}', + '{{ region }}'; + +/*+ statecheck, retries=5, retry_delay=5 */ +SELECT COUNT(*) as count FROM +( +SELECT data__Identifier +FROM aws.ec2.routes +WHERE region = '{{ region }}' +AND data__Identifier = '{{ route_table_id }}|0.0.0.0/0' +) t + +/*+ exports, retries=3, retry_delay=5 */ +SELECT data__Identifier as nat_inet_route_indentifer +FROM aws.ec2.routes +WHERE region = '{{ region }}' +AND data__Identifier = '{{ route_table_id }}|0.0.0.0/0'; + +/*+ delete */ +DELETE FROM aws.ec2.routes +WHERE data__Identifier = '{{ inet_route_indentifer }}' +AND region = '{{ region }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/route_table.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/route_table.iql new file mode 100644 index 0000000..7b0aa76 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/route_table.iql @@ -0,0 +1,54 @@ +/*+ exists */ +SELECT COUNT(*) as count FROM +( +SELECT route_table_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.route_table_tags +WHERE region = '{{ region }}' +GROUP BY route_table_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +AND json_extract(tags, '$.Name') = '{{ route_table_name }}' +) t + +/*+ create */ +INSERT INTO aws.ec2.route_tables ( + VpcId, + Tags, + region +) +SELECT + '{{ vpc_id }}', + '{{ tags }}', + '{{ region }}'; + +/*+ statecheck, retries=3, retry_delay=5 */ +SELECT COUNT(*) as count FROM +( +SELECT route_table_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.route_table_tags +WHERE region = '{{ region }}' +GROUP BY route_table_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +AND json_extract(tags, '$.Name') = '{{ route_table_name }}' +) t + +/*+ exports, retries=3, retry_delay=5 */ +SELECT route_table_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.route_table_tags +WHERE region = '{{ region }}' +GROUP BY route_table_id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +AND json_extract(tags, '$.Name') = '{{ route_table_name }}' + +/*+ delete */ +DELETE FROM aws.ec2.route_tables +WHERE data__Identifier = '{{ route_table_id }}' +AND region = '{{ region }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql index f307613..cc03c2a 100644 --- a/examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/tag_main_vpc_route_table.iql @@ -4,4 +4,4 @@ set data__PatchDocument = string('{{ { "Tags": tags } | generate_patch_document }}') WHERE region = '{{ region }}' -AND data__Identifier = '{{ route_table_id }}'; \ No newline at end of file +AND data__Identifier = '{{ main_route_table_id }}'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc_endpoint.iql b/examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc_endpoint.iql new file mode 100644 index 0000000..d40f522 --- /dev/null +++ b/examples/databricks/all-purpose-cluster/resources/aws/vpc/vpc_endpoint.iql @@ -0,0 +1,60 @@ +/*+ exists */ +SELECT COUNT(*) as count FROM +( + SELECT id, + json_group_object(tag_key, tag_value) as tags + FROM aws.ec2.vpc_endpoint_tags + WHERE region = '{{ region }}' + AND service_name = '{{ service_name }}' + GROUP BY id + HAVING json_extract(tags, '$.Provisioner') = 'stackql' + AND json_extract(tags, '$.StackName') = '{{ stack_name }}' + AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t; + +/*+ create */ +INSERT INTO aws.ec2.vpc_endpoints ( + ServiceName, + VpcEndpointType, + VpcId, + RouteTableIds, + Tags, + region +) +SELECT + '{{ service_name }}', + '{{ vpc_endpoint_type }}', + '{{ vpc_id }}', + '{{ route_table_ids }}', + '{{ tags }}', + '{{ region }}'; + +/*+ statecheck, retries=5, retry_delay=5 */ +SELECT COUNT(*) as count FROM +( + SELECT id, + json_group_object(tag_key, tag_value) as tags + FROM aws.ec2.vpc_endpoint_tags + WHERE region = '{{ region }}' + AND service_name = '{{ service_name }}' + GROUP BY id + HAVING json_extract(tags, '$.Provisioner') = 'stackql' + AND json_extract(tags, '$.StackName') = '{{ stack_name }}' + AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}' +) t; + +/*+ exports, retries=3, retry_delay=5 */ +SELECT id as s3_gateway_endpoint_id, +json_group_object(tag_key, tag_value) as tags +FROM aws.ec2.vpc_endpoint_tags +WHERE region = '{{ region }}' +AND service_name = '{{ service_name }}' +GROUP BY id +HAVING json_extract(tags, '$.Provisioner') = 'stackql' +AND json_extract(tags, '$.StackName') = '{{ stack_name }}' +AND json_extract(tags, '$.StackEnv') = '{{ stack_env }}'; + +/*+ delete */ +DELETE FROM aws.ec2.vpc_endpoints +WHERE data__Identifier = 's3_gateway_endpoint_id' +AND region = 'us-east-1'; \ No newline at end of file diff --git a/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql index 8ba84f7..9da2dea 100644 --- a/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql +++ b/examples/databricks/all-purpose-cluster/resources/databricks_account/workspace.iql @@ -8,7 +8,6 @@ AND workspace_name = '{{ workspace_name }}' INSERT INTO databricks_account.provisioning.workspaces ( account_id, data__workspace_name, -data__network_id, data__aws_region, data__credentials_id, data__storage_configuration_id, @@ -17,7 +16,6 @@ data__pricing_tier SELECT '{{ databricks_account_id }}', '{{ workspace_name }}', -'{{ network_id }}', '{{ aws_region }}', '{{ credentials_id }}', '{{ storage_configuration_id }}', @@ -28,14 +26,14 @@ SELECT COUNT(*) as count FROM databricks_account.provisioning.workspaces WHERE account_id = '{{ databricks_account_id }}' AND workspace_name = '{{ workspace_name }}' -AND network_id = '{{ network_id }}' AND aws_region = '{{ aws_region }}' AND credentials_id = '{{ credentials_id }}' AND storage_configuration_id = '{{ storage_configuration_id }}' AND pricing_tier = '{{ pricing_tier }}' /*+ exports */ -SELECT workspace_id AS databricks_workspace_id +SELECT workspace_id AS databricks_workspace_id, +deployment_name AS databricks_deployment_name FROM databricks_account.provisioning.workspaces WHERE account_id = '{{ databricks_account_id }}' AND workspace_name = '{{ workspace_name }}' diff --git a/examples/databricks/all-purpose-cluster/stackql_manifest.yml b/examples/databricks/all-purpose-cluster/stackql_manifest.yml index 1e6b99d..7a6a4bd 100644 --- a/examples/databricks/all-purpose-cluster/stackql_manifest.yml +++ b/examples/databricks/all-purpose-cluster/stackql_manifest.yml @@ -31,6 +31,7 @@ resources: # AWS IAM # ==================================================================================== - name: aws/iam/cross_account_role + file: aws/iam/iam_role.iql props: - name: role_name value: "{{ stack_name }}-{{ stack_env }}-role" @@ -136,7 +137,7 @@ resources: Version: '2012-10-17' PolicyName: "{{ stack_name }}-{{ stack_env }}-policy" exports: - - aws_iam_cross_account_role_arn + - aws_iam_role_arn: aws_iam_cross_account_role_arn - name: databricks_account/credentials props: - name: credentials_name @@ -148,6 +149,127 @@ resources: exports: - databricks_credentials_id - databricks_role_external_id + - name: aws/iam/databricks_compute_role + file: aws/iam/iam_role.iql + props: + - name: role_name + value: "{{ stack_name }}-{{ stack_env }}-compute-role" + - name: assume_role_policy_document + value: + Version: "2012-10-17" + Statement: + - Action: "sts:AssumeRole" + Effect: "Allow" + Principal: + AWS: "{{ 'arn:aws:iam::314146311478:root' if trustInternalAccount == 'true' else 'arn:aws:iam::414351767826:root' }}" + Condition: + StringEquals: + sts:ExternalId: "{{ databricks_account_id }}" + - name: description + value: 'allows Databricks to access compute resources in ({{ stack_name }}-{{ stack_env }})' + - name: path + value: '/' + - name: policies + value: + - PolicyName: "Base" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: "CreateEC2ResourcesWithRequestTag" + Effect: "Allow" + Action: + - "ec2:CreateFleet" + - "ec2:CreateLaunchTemplate" + - "ec2:CreateVolume" + - "ec2:RequestSpotInstances" + - "ec2:RunInstances" + Resource: ["*"] + Condition: + StringEquals: + aws:RequestTag/Vendor: "Databricks" + - Sid: "AllowDatabricksTagOnCreate" + Effect: "Allow" + Action: ["ec2:CreateTags"] + Resource: ["*"] + Condition: + StringEquals: + ec2:CreateAction: + - "CreateFleet" + - "CreateLaunchTemplate" + - "CreateVolume" + - "RequestSpotInstances" + - "RunInstances" + - Sid: "UpdateByResourceTags" + Effect: "Allow" + Action: + - "ec2:AssignPrivateIpAddresses" + - "ec2:AssociateIamInstanceProfile" + - "ec2:AttachVolume" + - "ec2:AuthorizeSecurityGroupEgress" + - "ec2:AuthorizeSecurityGroupIngress" + - "ec2:CancelSpotInstanceRequests" + - "ec2:CreateFleet" + - "ec2:CreateLaunchTemplate" + - "ec2:CreateLaunchTemplateVersion" + - "ec2:CreateVolume" + - "ec2:DetachVolume" + - "ec2:DisassociateIamInstanceProfile" + - "ec2:ModifyFleet" + - "ec2:ModifyLaunchTemplate" + - "ec2:RequestSpotInstances" + - "ec2:RevokeSecurityGroupEgress" + - "ec2:RevokeSecurityGroupIngress" + - "ec2:RunInstances" + Resource: ["*"] + Condition: + StringEquals: + ec2:ResourceTag/Vendor: "Databricks" + - Sid: "GetByResourceTags" + Effect: "Allow" + Action: ["ec2:GetLaunchTemplateData"] + Resource: ["*"] + Condition: + StringEquals: + ec2:ResourceTag/Vendor: "Databricks" + - Sid: "DeleteByResourceTags" + Effect: "Allow" + Action: + - "ec2:DeleteFleets" + - "ec2:DeleteLaunchTemplate" + - "ec2:DeleteLaunchTemplateVersions" + - "ec2:DeleteTags" + - "ec2:DeleteVolume" + - "ec2:TerminateInstances" + Resource: ["*"] + Condition: + StringEquals: + ec2:ResourceTag/Vendor: "Databricks" + - Sid: "DescribeResources" + Effect: "Allow" + Action: + - "ec2:DescribeAvailabilityZones" + - "ec2:DescribeFleets" + - "ec2:DescribeIamInstanceProfileAssociations" + - "ec2:DescribeInstances" + - "ec2:DescribeInstanceStatus" + - "ec2:DescribeInternetGateways" + - "ec2:DescribeLaunchTemplates" + - "ec2:DescribeLaunchTemplateVersions" + - "ec2:DescribeNatGateways" + - "ec2:DescribeNetworkAcls" + - "ec2:DescribePrefixLists" + - "ec2:DescribeReservedInstancesOfferings" + - "ec2:DescribeRouteTables" + - "ec2:DescribeSecurityGroups" + - "ec2:DescribeSpotInstanceRequests" + - "ec2:DescribeSpotPriceHistory" + - "ec2:DescribeSubnets" + - "ec2:DescribeVolumes" + - "ec2:DescribeVpcs" + - "ec2:GetSpotPlacementScores" + Resource: ["*"] + exports: + - aws_iam_role_arn: databricks_compute_role_arn # ==================================================================================== # AWS VPC Networking # ==================================================================================== @@ -156,7 +278,7 @@ resources: - name: cidr_block values: prd: - value: "10.0.0.0/16" + value: "10.53.0.0/16" sit: value: "10.1.0.0/16" dev: @@ -171,27 +293,36 @@ resources: value: 019447a0-b84a-7b7f-bca5-2ee320207e51 exports: - vpc_id - - name: aws/vpc/get_main_route_table_id - type: query - exports: - - route_table_id - - name: aws/vpc/tag_main_vpc_route_table - type: command + - name: aws/vpc/nat_subnet + file: aws/vpc/subnet.iql props: + - name: availability_zone + value: "us-east-1a" + - name: cidr_block + values: + prd: + value: "10.53.0.0/24" + sit: + value: "10.1.0.0/19" + dev: + value: "10.2.0.0/19" - name: tags value: - Key: Name - Value: "{{ stack_name }}-{{ stack_env }}-route-table" - merge: ['global_tags'] - - name: aws/vpc/subnet1 + Value: "{{ stack_name }}-{{ stack_env }}-nat-subnet" + merge: + - global_tags + exports: + - subnet_id: nat_subnet_id + - name: aws/vpc/cluster_subnet1 file: aws/vpc/subnet.iql props: - name: availability_zone - value: "us-east-1a" + value: "us-east-1b" - name: cidr_block values: prd: - value: "10.0.0.0/19" + value: "10.53.160.0/19" sit: value: "10.1.0.0/19" dev: @@ -203,16 +334,16 @@ resources: merge: - global_tags exports: - - subnet_id: aws_vpc_subnet1_id - - name: aws/vpc/subnet2 + - subnet_id: cluster_subnet1_id + - name: aws/vpc/cluster_subnet2 file: aws/vpc/subnet.iql props: - name: availability_zone - value: "us-east-1b" + value: "us-east-1c" - name: cidr_block values: prd: - value: "10.0.32.0/19" + value: "10.53.192.0/19" sit: value: "10.1.32.0/19" dev: @@ -224,7 +355,7 @@ resources: merge: - global_tags exports: - - subnet_id: aws_vpc_subnet2_id + - subnet_id: cluster_subnet2_id - name: aws/vpc/inet_gateway props: - name: tags @@ -238,28 +369,70 @@ resources: - internet_gateway_id - name: aws/vpc/inet_gw_attachment props: [] + - name: aws/vpc/nat_route_table + file: aws/vpc/route_table.iql + props: + - name: route_table_name + value: "{{ stack_name }}-{{ stack_env }}-nat-route-table" + - name: tags + value: + - Key: Name + Value: "{{ stack_name }}-{{ stack_env }}-nat-route-table" + merge: ['global_tags'] + exports: + - route_table_id: nat_route_table_id + - name: aws/vpc/nat_route_to_inet + file: aws/vpc/inet_route.iql + props: + - name: route_table_id + value: "{{ nat_route_table_id }}" + exports: + - inet_route_indentifer: nat_inet_route_indentifer + - name: aws/vpc/nat_subnet_rt_assn + file: aws/vpc/subnet_rt_assn.iql + props: + - name: subnet_id + value: "{{ nat_subnet_id }}" + - name: route_table_id + value: "{{ nat_route_table_id }}" + - name: idempotency_token + value: 3eaf3040-1c8e-41a6-8be6-512ccaf5ff4e + exports: + - route_table_assn_id: nat_subnet_rt_assn_id + - name: aws/vpc/private_route_table + file: aws/vpc/route_table.iql + props: + - name: route_table_name + value: "{{ stack_name }}-{{ stack_env }}-private-route-table" + - name: tags + value: + - Key: Name + Value: "{{ stack_name }}-{{ stack_env }}-private-route-table" + merge: ['global_tags'] + exports: + - route_table_id: private_route_table_id - name: aws/vpc/subnet_rt_assn1 file: aws/vpc/subnet_rt_assn.iql props: + - name: route_table_id + value: "{{ private_route_table_id }}" - name: subnet_id - value: "{{ aws_vpc_subnet1_id }}" + value: "{{ cluster_subnet1_id }}" - name: idempotency_token value: 019447aa-1c7a-775b-91dc-04db7c49f4a7 exports: - - route_table_assn_id: aws_vpc_subnet1_rt_assn_id + - route_table_assn_id: cluster_subnet1_rt_assn_id - name: aws/vpc/subnet_rt_assn2 file: aws/vpc/subnet_rt_assn.iql props: + - name: route_table_id + value: "{{ private_route_table_id }}" - name: subnet_id - value: "{{ aws_vpc_subnet2_id }}" + value: "{{ cluster_subnet2_id }}" - name: idempotency_token - value: 019447ab-1302-754a-a580-99071f1ad814 - exports: - - route_table_assn_id: aws_vpc_subnet2_rt_assn_id - - name: aws/vpc/inet_route - props: [] + value: c19c9077-c25d-46a4-a299-7bd93d773e58 exports: - - inet_route_indentifer + - route_table_assn_id: cluster_subnet2_rt_assn_id - name: aws/vpc/elastic_ip props: - name: tags @@ -283,6 +456,14 @@ resources: value: 019447a5-f076-75f8-9173-092df5a66d35 exports: - nat_gateway_id + - name: aws/vpc/nat_inet_route + props: + - name: route_table_id + value: "{{ private_route_table_id }}" + - name: nat_gateway_id + value: "{{ nat_gateway_id }}" + exports: + - nat_inet_route_indentifer - name: aws/vpc/security_group props: - name: group_name @@ -300,15 +481,15 @@ resources: props: - name: security_group_ingress value: - - FromPort: 1025 + - FromPort: 0 ToPort: 65535 SourceSecurityGroupOwnerId: "{{ aws_account }}" IpProtocol: tcp SourceSecurityGroupId: "{{ security_group_id }}" - - FromPort: 1025 + - FromPort: 0 ToPort: 65535 SourceSecurityGroupOwnerId: "{{ aws_account }}" - IpProtocol: udp + IpProtocol: "udp" SourceSecurityGroupId: "{{ security_group_id }}" - CidrIp: "3.237.73.224/28" FromPort: 443 @@ -320,19 +501,39 @@ resources: IpProtocol: "tcp" - name: security_group_egress value: + - FromPort: 0 + ToPort: 65535 + IpProtocol: "tcp" + DestinationSecurityGroupId: "{{ security_group_id }}" + Description: "Allow all TCP outbound access to the same security group" - CidrIp: "0.0.0.0/0" Description: Allow all outbound traffic FromPort: -1 ToPort: -1 IpProtocol: "-1" + - CidrIp: "0.0.0.0/0" + FromPort: 3306 + ToPort: 3306 + IpProtocol: "tcp" + Description: "Allow accessing the Databricks metastore" + - FromPort: 0 + ToPort: 65535 + IpProtocol: "udp" + DestinationSecurityGroupId: "{{ security_group_id }}" + Description: "Allow all UDP outbound access to the same security group" + - CidrIp: "0.0.0.0/0" + FromPort: 443 + ToPort: 443 + IpProtocol: "tcp" + Description: "Allow accessing Databricks infrastructure, cloud data sources, and library repositories" - name: databricks_account/network props: - name: databricks_network_name value: "{{ stack_name }}-{{ stack_env }}-network" - name: subnet_ids value: - - "{{ aws_vpc_subnet1_id }}" - - "{{ aws_vpc_subnet2_id }}" + - "{{ cluster_subnet1_id }}" + - "{{ cluster_subnet2_id }}" - name: security_group_ids value: - "{{ security_group_id }}" @@ -387,6 +588,23 @@ resources: Resource: - "{{ aws_s3_workspace_bucket_arn }}/*" - "{{ aws_s3_workspace_bucket_arn }}" + - name: aws/vpc/vpc_endpoint + props: + - name: service_name + value: "com.amazonaws.{{ region }}.s3" + - name: vpc_endpoint_type + value: "Gateway" + - name: route_table_ids + value: + - "{{ private_route_table_id }}" + - name: tags + value: + - Key: Name + Value: "{{ stack_name }}-{{ stack_env }}-s3-vpc-endpoint" + merge: + - global_tags + exports: + - s3_gateway_endpoint_id - name: databricks_account/storage_configuration props: - name: storage_configuration_name @@ -414,7 +632,8 @@ resources: - name: pricing_tier value: PREMIUM exports: - - databricks_workspace_id + - databricks_workspace_id + - databricks_deployment_name - name: databricks_account/workspace_group props: - name: display_name @@ -468,11 +687,3 @@ resources: exports: - databricks_cluster_id - databricks_cluster_state - - # "spark_conf": { - # "spark.databricks.cluster.profile": "singleNode", - # "spark.master": "local[*, 4]" - # }, - # "spark_env_vars": { - # "PYSPARK_PYTHON": "/databricks/python3/bin/python3" - # }, \ No newline at end of file