Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pull_request_push_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
SPARK_CONFIG__SPARK_CLUSTER: databricks
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}}
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"maven": {"coordinates": "com.azure.cosmos.spark:azure-cosmos-spark_3-1_2-12:4.16.0"}}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"11.3.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
Expand Down Expand Up @@ -311,4 +311,4 @@ jobs:
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Notification
run: |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
3 changes: 1 addition & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ dependencies {
implementation 'net.snowflake:snowflake-jdbc:3.13.18'
implementation 'net.snowflake:spark-snowflake_2.12:2.10.0-spark_3.2'
provided 'com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.21'
implementation 'com.azure.cosmos.spark:azure-cosmos-spark_3-1_2-12:4.16.0'
provided 'com.azure.cosmos.spark:azure-cosmos-spark_3-2_2-12:4.11.1'
provided 'com.microsoft.sqlserver:mssql-jdbc:10.2.0.jre8'
provided 'org.eclipse.jetty:jetty-util:9.3.24.v20180605'
provided 'org.apache.kafka:kafka-clients:3.1.0'
Expand Down Expand Up @@ -130,7 +130,6 @@ project.ext.spec = [
'avro' : "org.apache.avro:avro:1.10.2",
"avroUtil": "com.linkedin.avroutil1:helper-all:0.2.100",
"azure": "com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.21",
"spark_cosmos": "com.azure.cosmos.spark:azure-cosmos-spark_3-1_2-12:4.16.0",
'fastutil' : "it.unimi.dsi:fastutil:8.1.1",
'mvel' : "org.mvel:mvel2:2.2.8.Final",
'protobuf' : "com.google.protobuf:protobuf-java:2.6.1",
Expand Down
1 change: 0 additions & 1 deletion docs/samples/feature_embedding.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@
"client = FeathrClient(\n",
" config_path=config_path,\n",
" credential=credential,\n",
" use_env_vars=False,\n",
")"
]
},
Expand Down
2 changes: 0 additions & 2 deletions feathr-impl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ dependencies {
implementation spec.product.jackson.dataformat_hocon
implementation spec.product.jackson.jackson_core
implementation spec.product.spark_redis
implementation spec.product.spark_cosmos
implementation spec.product.fastutil
implementation spec.product.hadoop.mapreduce_client_core
implementation spec.product.mvel
Expand Down Expand Up @@ -78,7 +77,6 @@ dependencies {

testImplementation spec.product.equalsverifier
testImplementation spec.product.spark.spark_catalyst
testImplementation spec.product.spark_cosmos
testImplementation spec.product.mockito
testImplementation spec.product.scala.scalatest
testImplementation spec.product.testing
Expand Down
4 changes: 1 addition & 3 deletions feathr_project/feathr/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def __init__(
local_workspace_dir: str = None,
credential: Any = None,
project_registry_tag: Dict[str, str] = None,
use_env_vars: bool = True,
):
"""Initialize Feathr Client.

Expand All @@ -74,13 +73,12 @@ def __init__(
local_workspace_dir (optional): Set where is the local work space dir. If not set, Feathr will create a temporary folder to store local workspace related files.
credential (optional): Azure credential to access cloud resources, most likely to be the returned result of DefaultAzureCredential(). If not set, Feathr will initialize DefaultAzureCredential() inside the __init__ function to get credentials.
project_registry_tag (optional): Adding tags for project in Feathr registry. This might be useful if you want to tag your project as deprecated, or allow certain customizations on project level. Default is empty
use_env_vars (optional): Whether to use environment variables to set up the client. If set to False, the client will not use environment variables to set up the client. Defaults to True.
"""
self.logger = logging.getLogger(__name__)
# Redis key separator
self._KEY_SEPARATOR = ':'
self._COMPOSITE_KEY_SEPARATOR = '#'
self.env_config = EnvConfigReader(config_path=config_path, use_env_vars=use_env_vars)
self.env_config = EnvConfigReader(config_path=config_path)
if local_workspace_dir:
self.local_workspace_dir = local_workspace_dir
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def submit_feathr_job(
spark_args = self._init_args(job_name=job_name, confs=cfg)
# Add additional repositories
spark_args.extend(["--repositories", "https://repository.mulesoft.org/nexus/content/repositories/public/,https://linkedin.jfrog.io/artifactory/open-source/"])
# spark_args.extend(["--repositories", "https://linkedin.jfrog.io/artifactory/open-source/"])

if not main_jar_path:
# We don't have the main jar, use Maven
Expand Down
24 changes: 12 additions & 12 deletions feathr_project/feathr/utils/_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@ class EnvConfigReader(object):
"""A utility class to read Feathr environment variables either from os environment variables,
the config yaml file or Azure Key Vault.
If a key is set in the environment variable, ConfigReader will return the value of that environment variable
unless use_env_vars set to False.
It will retrieve the value in the following order:
- From the environment variable if the key is set in the os environment variables.
- From the config yaml file if the key exists.
- From the Azure Key Vault.
"""
akv_name: str = None # Azure Key Vault name to use for retrieving config values.
yaml_config: dict = None # YAML config file content.

def __init__(self, config_path: str, use_env_vars: bool = True):
def __init__(self, config_path: str):
"""Initialize the utility class.

Args:
config_path: Config file path.
use_env_vars (optional): Whether to use os environment variables instead of config file. Defaults to True.
"""
if config_path is not None:
config_path = Path(config_path)
Expand All @@ -31,15 +33,14 @@ def __init__(self, config_path: str, use_env_vars: bool = True):
except yaml.YAMLError as e:
logger.warning(e)

self.use_env_vars = use_env_vars

self.akv_name = self.get("secrets__azure_key_vault__name")
self.akv_client = AzureKeyVaultClient(self.akv_name) if self.akv_name else None

def get(self, key: str, default: str = None) -> str:
"""Gets the Feathr config variable for the given key.
It will retrieve the value in the following order:
- From the environment variable if `use_env_vars == True` and the key is set in the os environment variables.
- From the environment variable if the key is set in the os environment variables.
- From the config yaml file if the key exists.
- From the Azure Key Vault.
If the key is not found in any of the above, it will return `default`.
Expand All @@ -51,9 +52,9 @@ def get(self, key: str, default: str = None) -> str:
Returns:
Feathr client's config value.
"""
res_env = (self._get_variable_from_env(key) if self.use_env_vars else None)
res_file = (self._get_variable_from_file(key) if self.yaml_config else None)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)
res_env = self._get_variable_from_env(key)
res_file = (self._get_variable_from_file(key) if self.yaml_config and res_env is None else None)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name and res_env is None and res_file is None else None)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
Expand All @@ -67,8 +68,7 @@ def get(self, key: str, default: str = None) -> str:
return default

def get_from_env_or_akv(self, key: str) -> str:
"""Gets the Feathr config variable for the given key. This function ignores `use_env_vars` attribute and force to
look up environment variables or Azure Key Vault.
"""Gets the Feathr config variable for the given key. This function will look up environment variables or Azure Key Vault.
It will retrieve the value in the following order:
- From the environment variable if the key is set in the os environment variables.
- From the Azure Key Vault.
Expand All @@ -80,8 +80,8 @@ def get_from_env_or_akv(self, key: str) -> str:
Returns:
Feathr client's config value.
"""
res_env = (self._get_variable_from_env(key) if self.use_env_vars else None)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)
res_env = self._get_variable_from_env(key)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name and res_env is None else None)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
Expand Down
1 change: 1 addition & 0 deletions feathr_project/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"py4j<=0.10.9.7",
"loguru<=0.6.0",
"pandas",
"numpy<=1.20.3", # pin numpy due to pyspark's deprecated np.bool access
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this one get reverted?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so, but let's see if all the test can pass, then revert this one. Otherwise one of the test might fail

"redis<=4.4.0",
"requests<=2.28.1",
"tqdm<=4.64.1",
Expand Down
25 changes: 10 additions & 15 deletions feathr_project/test/unit/utils/test_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,51 +18,46 @@


@pytest.mark.parametrize(
"use_env_vars, env_value, expected_value",
"env_value, expected_value",
[
(True, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(True, None, TEST_CONFIG_FILE_VAL),
(False, TEST_CONFIG_ENV_VAL, TEST_CONFIG_FILE_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
( None, TEST_CONFIG_FILE_VAL),
]
)
def test__envvariableutil__get(
mocker: MockerFixture,
use_env_vars: bool,
env_value: str,
expected_value: str,
):
"""Test `get` method if it returns the correct value
along with `use_env_vars` argument.
"""
if env_value:
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value})

f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name, use_env_vars=use_env_vars)
env_config = EnvConfigReader(config_path=f.name)
assert env_config.get(TEST_CONFIG_KEY) == expected_value


@pytest.mark.parametrize(
"use_env_vars, env_value, expected_value",
"env_value, expected_value",
[
(True, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(True, None, None),
(False, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(None, None),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
]
)
def test__envvariableutil__get_from_env_or_akv(
mocker: MockerFixture,
use_env_vars: bool,
env_value: str,
expected_value: str,
):
"""Test `get_from_env_or_akv` method if it returns the environment variable regardless of `use_env_vars` argument.
"""Test `get_from_env_or_akv` method if it returns the environment variable

Args:
mocker (MockerFixture): _description_
use_env_vars (bool): _description_
env_value (str): _description_
expected_value (str): _description_
"""
Expand All @@ -72,5 +67,5 @@ def test__envvariableutil__get_from_env_or_akv(
f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name, use_env_vars=use_env_vars)
env_config = EnvConfigReader(config_path=f.name)
assert env_config.get_from_env_or_akv(TEST_CONFIG_KEY) == expected_value