From 89c9bfd1a514003c5710f0194c4d231cdd25851f Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 9 Sep 2022 15:11:57 -0700 Subject: [PATCH 01/10] Adding DevSkim linter to Github actions --- .github/workflows/devskim-security-linter.yml | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/devskim-security-linter.yml diff --git a/.github/workflows/devskim-security-linter.yml b/.github/workflows/devskim-security-linter.yml new file mode 100644 index 000000000..d52fa57a8 --- /dev/null +++ b/.github/workflows/devskim-security-linter.yml @@ -0,0 +1,35 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party (Microsoft) and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# For more details about Devskim, visit https://github.com/marketplace/actions/devskim + +name: DevSkim + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '25 4 * * 2' + +jobs: + lint: + name: DevSkim + runs-on: ubuntu-20.04 + permissions: + actions: read + contents: read + security-events: write + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run DevSkim scanner + uses: microsoft/DevSkim-Action@v1 + + - name: Upload DevSkim scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: devskim-results.sarif From a9c2343d4cb9e06b942766d30689cf50904e5727 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 14 Sep 2022 02:04:27 -0700 Subject: [PATCH 02/10] Fix in ARM template to pull latest tagged release image from dockerhub --- docs/dev_guide/feathr_overall_release_guide.md | 5 +++-- docs/how-to-guides/azure_resource_provision.json | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/dev_guide/feathr_overall_release_guide.md b/docs/dev_guide/feathr_overall_release_guide.md index 015e48f6f..983bf9249 100644 --- a/docs/dev_guide/feathr_overall_release_guide.md +++ b/docs/dev_guide/feathr_overall_release_guide.md @@ -33,8 +33,9 @@ Before the release is made, the version needs to be updated in following places - [build.sbt](https://github.com/linkedin/feathr/blob/main/build.sbt#L3) - For Maven release version - [setup.py](https://github.com/linkedin/feathr/blob/main/feathr_project/setup.py#L10) - For PyPi release version - [conf.py](https://github.com/linkedin/feathr/blob/main/feathr_project/docs/conf.py#L27) - For documentation version -- [feathr_config.yaml](https://github.com/linkedin/feathr/blob/main/feathr_project/test/test_user_workspace/feathr_config.yaml#L84) - To set the spark runtime location for Azure Synapse and Azure Databricks used by test suite. -- [constants.py](https://github.com/linkedin/feathr/blob/73656fe4a57219e99ff6fede10d51a000ae90fa1/feathr_project/feathr/constants.py#L31) - To set the default maven artifact version +- [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/test/test_user_workspace/feathr_config.yaml#L84) - To set the spark runtime location for Azure Synapse and Azure Databricks used by test suite. +- [constants.py](https://github.com/feathr-ai/feathr/blob/73656fe4a57219e99ff6fede10d51a000ae90fa1/feathr_project/feathr/constants.py#L31) - To set the default maven artifact version +- [azure_resource_provision.json](https://github.com/feathr-ai/feathr/blob/main/docs/how-to-guides/azure_resource_provision.json#L114) - To set the deployment template to pull the latest release image. ## Triggering automated release pipelines Our goal is to automate the release process as much as possible. So far, we have automated the following steps diff --git a/docs/how-to-guides/azure_resource_provision.json b/docs/how-to-guides/azure_resource_provision.json index 827757b8c..1f8193e61 100644 --- a/docs/how-to-guides/azure_resource_provision.json +++ b/docs/how-to-guides/azure_resource_provision.json @@ -111,7 +111,7 @@ "destinationBacpacBlobUrl": "[concat('https://',variables('dlsName'),'.blob.core.windows.net/',variables('dlsFsName'),'/',variables('bacpacBlobName'))]", "bacpacDeploymentScriptName": "CopyBacpacFile", "bacpacDbExtensionName": "registryRbacDbImport", - "preBuiltdockerImage": "feathrfeaturestore/feathr-registry" + "preBuiltdockerImage": "feathrfeaturestore/feathr-registry:releases-v0.7.2" }, "functions": [], "resources": [ From cd056a9c25947103adbebe4a1aec8f57da81d259 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 14 Sep 2022 02:06:25 -0700 Subject: [PATCH 03/10] Removing dev skim file from this branch --- .github/workflows/devskim-security-linter.yml | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 .github/workflows/devskim-security-linter.yml diff --git a/.github/workflows/devskim-security-linter.yml b/.github/workflows/devskim-security-linter.yml deleted file mode 100644 index d52fa57a8..000000000 --- a/.github/workflows/devskim-security-linter.yml +++ /dev/null @@ -1,35 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party (Microsoft) and are governed by -# separate terms of service, privacy policy, and support -# documentation. -# For more details about Devskim, visit https://github.com/marketplace/actions/devskim - -name: DevSkim - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - schedule: - - cron: '25 4 * * 2' - -jobs: - lint: - name: DevSkim - runs-on: ubuntu-20.04 - permissions: - actions: read - contents: read - security-events: write - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Run DevSkim scanner - uses: microsoft/DevSkim-Action@v1 - - - name: Upload DevSkim scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: devskim-results.sarif From d7a41dab9b9ddf2e788423f4f4c456e70d34702e Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 14 Sep 2022 02:28:25 -0700 Subject: [PATCH 04/10] Fixing linkedin org reference --- docs/dev_guide/feathr_overall_release_guide.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/dev_guide/feathr_overall_release_guide.md b/docs/dev_guide/feathr_overall_release_guide.md index 983bf9249..f781d82e1 100644 --- a/docs/dev_guide/feathr_overall_release_guide.md +++ b/docs/dev_guide/feathr_overall_release_guide.md @@ -30,9 +30,9 @@ Read through the [commit log](https://github.com/linkedin/feathr/commits/main) t ## Code Changes Before the release is made, the version needs to be updated in following places -- [build.sbt](https://github.com/linkedin/feathr/blob/main/build.sbt#L3) - For Maven release version -- [setup.py](https://github.com/linkedin/feathr/blob/main/feathr_project/setup.py#L10) - For PyPi release version -- [conf.py](https://github.com/linkedin/feathr/blob/main/feathr_project/docs/conf.py#L27) - For documentation version +- [build.sbt](https://github.com/feathr-ai/feathr/blob/main/build.sbt#L3) - For Maven release version +- [setup.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/setup.py#L10) - For PyPi release version +- [conf.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/docs/conf.py#L27) - For documentation version - [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/test/test_user_workspace/feathr_config.yaml#L84) - To set the spark runtime location for Azure Synapse and Azure Databricks used by test suite. - [constants.py](https://github.com/feathr-ai/feathr/blob/73656fe4a57219e99ff6fede10d51a000ae90fa1/feathr_project/feathr/constants.py#L31) - To set the default maven artifact version - [azure_resource_provision.json](https://github.com/feathr-ai/feathr/blob/main/docs/how-to-guides/azure_resource_provision.json#L114) - To set the deployment template to pull the latest release image. From dc9ecbd68f521323b381d18e263c3e8eae302664 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 20 Sep 2022 06:37:25 -0700 Subject: [PATCH 05/10] Removing the docker index url from dockerhub image name as it seems to cause problem with the update --- .github/workflows/publish-to-dockerhub.yml | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 .github/workflows/publish-to-dockerhub.yml diff --git a/.github/workflows/publish-to-dockerhub.yml b/.github/workflows/publish-to-dockerhub.yml new file mode 100644 index 000000000..d680d422b --- /dev/null +++ b/.github/workflows/publish-to-dockerhub.yml @@ -0,0 +1,80 @@ +# This workflow builds the docker container and publishes to dockerhub with appropriate tag +# It has two triggers, +# 1. daily i.e. runs everyday at specific time. +# 2. Anytime a new branch is created under releases + +name: Publish Feathr Docker image to DockerHub + +on: + workflow_dispatch: + schedule: + # Runs daily at 10 PM UTC, would generate nightly tag + - cron: '00 22 * * *' + + + push: + # For every push against the releases/** branch, usually would happen at release time, Tag example - releases/v0.7.0 + branches: + - 'releases/**' + + +jobs: + build_and_push_image_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: feathrfeaturestore/feathr-registry + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: FeathrRegistry.Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + # Deploy the docker container to the three test environments for feathr + deploy: + runs-on: ubuntu-latest + needs: build_and_push_image_to_registry + + + steps: + - name: Deploy to Feathr SQL Registry Azure Web App + id: deploy-to-sql-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: 'feathr-sql-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }} + images: 'feathrfeaturestore/feathr-registry:nightly' + + - name: Deploy to Feathr Purview Registry Azure Web App + id: deploy-to-purview-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: 'feathr-purview-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} + images: 'feathr-registry:nightly' + + - name: Deploy to Feathr RBAC Registry Azure Web App + id: deploy-to-rbac-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: 'feathr-rbac-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} + images: 'feathrfeaturestore/feathr-registry:nightly' + From cb32fb47b12a01d198f0b0f6a272e756aa408e27 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 20 Sep 2022 06:46:02 -0700 Subject: [PATCH 06/10] Adding to the right file, had a dockerhub workflow file with different name --- .github/workflows/docker-publish.yml | 6 +- .github/workflows/publish-to-dockerhub.yml | 80 ---------------------- 2 files changed, 3 insertions(+), 83 deletions(-) delete mode 100644 .github/workflows/publish-to-dockerhub.yml diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 6db0babf7..84e99b614 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -58,7 +58,7 @@ jobs: with: app-name: 'feathr-sql-registry' publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' + images: 'feathrfeaturestore/feathr-registry:nightly' - name: Deploy to Feathr Purview Registry Azure Web App id: deploy-to-purview-webapp @@ -66,7 +66,7 @@ jobs: with: app-name: 'feathr-purview-registry' publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' + images: 'feathrfeaturestore/feathr-registry:nightly' - name: Deploy to Feathr RBAC Registry Azure Web App id: deploy-to-rbac-webapp @@ -74,5 +74,5 @@ jobs: with: app-name: 'feathr-rbac-registry' publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' + images: 'feathrfeaturestore/feathr-registry:nightly' diff --git a/.github/workflows/publish-to-dockerhub.yml b/.github/workflows/publish-to-dockerhub.yml deleted file mode 100644 index d680d422b..000000000 --- a/.github/workflows/publish-to-dockerhub.yml +++ /dev/null @@ -1,80 +0,0 @@ -# This workflow builds the docker container and publishes to dockerhub with appropriate tag -# It has two triggers, -# 1. daily i.e. runs everyday at specific time. -# 2. Anytime a new branch is created under releases - -name: Publish Feathr Docker image to DockerHub - -on: - workflow_dispatch: - schedule: - # Runs daily at 10 PM UTC, would generate nightly tag - - cron: '00 22 * * *' - - - push: - # For every push against the releases/** branch, usually would happen at release time, Tag example - releases/v0.7.0 - branches: - - 'releases/**' - - -jobs: - build_and_push_image_to_registry: - name: Push Docker image to Docker Hub - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4 - with: - images: feathrfeaturestore/feathr-registry - - - name: Build and push Docker image - uses: docker/build-push-action@v3 - with: - context: . - file: FeathrRegistry.Dockerfile - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - # Deploy the docker container to the three test environments for feathr - deploy: - runs-on: ubuntu-latest - needs: build_and_push_image_to_registry - - - steps: - - name: Deploy to Feathr SQL Registry Azure Web App - id: deploy-to-sql-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: 'feathr-sql-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }} - images: 'feathrfeaturestore/feathr-registry:nightly' - - - name: Deploy to Feathr Purview Registry Azure Web App - id: deploy-to-purview-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: 'feathr-purview-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} - images: 'feathr-registry:nightly' - - - name: Deploy to Feathr RBAC Registry Azure Web App - id: deploy-to-rbac-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: 'feathr-rbac-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} - images: 'feathrfeaturestore/feathr-registry:nightly' - From ceefd9f4b36e0d44e2e35c1ea0f34c8339ee8656 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Mon, 26 Sep 2022 12:20:01 -0700 Subject: [PATCH 07/10] Adding debug statements to test udf issue on Synapse --- feathr_project/feathr/udf/_preprocessing_pyudf_manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py index ca7114343..e5e408258 100644 --- a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py +++ b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py @@ -100,9 +100,13 @@ def persist_pyspark_udf_to_file(user_func, local_workspace_dir): however if we retrieve features from registry, the current implementation is to use plain strings to store the function body. In that case, the user_fuc will be string. """ if isinstance(user_func, str): + print("In If loop -> user function name is : " + user_func) udf_source_code = [user_func] else: + print("In else loop -> user function name is : " + user_func) udf_source_code = inspect.getsourcelines(user_func)[0] + print("UDF SOURCE -----> " + udf_source_code) + lines = [] # Some basic imports will be provided lines = lines + PROVIDED_IMPORTS From 12931bc5aab8ea8e90f08098222ee2eb2daae900 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 28 Sep 2022 05:33:14 -0700 Subject: [PATCH 08/10] Adding more print statements --- feathr_project/feathr/udf/_preprocessing_pyudf_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py index e5e408258..6a7c4f08d 100644 --- a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py +++ b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py @@ -50,6 +50,8 @@ def build_anchor_preprocessing_metadata(anchor_list: List[FeatureAnchor], local_ continue preprocessing_func = anchor.source.preprocessing if preprocessing_func: + print("Preprocessing function defined") + print(anchor.source.preprocessing.name) _PreprocessingPyudfManager.persist_pyspark_udf_to_file(preprocessing_func, local_workspace_dir) feature_names = [feature.name for feature in anchor.features] features_with_preprocessing = features_with_preprocessing + feature_names From 43d1dfd2094ede1b2b5fcec38b494314fb2ff286 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 18 Oct 2022 07:07:10 -0700 Subject: [PATCH 09/10] Pinning msrest version to work with pinned version of azure-core --- feathr_project/setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/feathr_project/setup.py b/feathr_project/setup.py index f36ba18b0..7ecd24ecb 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -51,6 +51,10 @@ # https://github.com/Azure/azure-sdk-for-python/pull/22891 # using a version lower than that to workaround this issue. "azure-core<=1.22.1", + # azure-core 1.22.1 is dependent on msrest==0.6.21, if an environment(AML) has a different version of azure-core (say 1.24.0), + # it brings a different version of msrest(0.7.0) which is incompatible with azure-core==1.22.1. Hence we need to pin it. + # See this for more details: https://github.com/Azure/azure-sdk-for-python/issues/24765 + "msrest<=0.6.21", "typing_extensions>=4.2.0" ], tests_require=[ # TODO: This has been depricated From 9720232c031602f5c7c5315eecdd496b7f979f29 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 18 Oct 2022 21:34:00 -0700 Subject: [PATCH 10/10] Removing debug code from previous branch --- feathr_project/feathr/udf/_preprocessing_pyudf_manager.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py index b177a855e..55756ba3d 100644 --- a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py +++ b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py @@ -53,8 +53,6 @@ def build_anchor_preprocessing_metadata(anchor_list: List[FeatureAnchor], local_ continue preprocessing_func = anchor.source.preprocessing if preprocessing_func: - print("Preprocessing function defined") - print(anchor.source.preprocessing.name) _PreprocessingPyudfManager.persist_pyspark_udf_to_file(preprocessing_func, local_workspace_dir) feature_names = [feature.name for feature in anchor.features] features_with_preprocessing = features_with_preprocessing + feature_names @@ -111,13 +109,9 @@ def persist_pyspark_udf_to_file(user_func, local_workspace_dir): however if we retrieve features from registry, the current implementation is to use plain strings to store the function body. In that case, the user_fuc will be string. """ if isinstance(user_func, str): - print("In If loop -> user function name is : " + user_func) udf_source_code = [user_func] else: - print("In else loop -> user function name is : " + user_func) udf_source_code = inspect.getsourcelines(user_func)[0] - print("UDF SOURCE -----> " + udf_source_code) - lines = [] # Some basic imports will be provided lines = lines + PROVIDED_IMPORTS