diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index ec22274fe..39c6202b9 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -39,6 +39,8 @@ from feathr.utils.feature_printer import FeaturePrinter from feathr.utils.spark_job_params import FeatureGenerationJobParams, FeatureJoinJobParams from feathr.version import get_version +import importlib.util + class FeathrClient(object): @@ -71,7 +73,7 @@ def __init__( config_path (optional): Config yaml file path. See [Feathr Config Template](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) for more details. Defaults to "./feathr_config.yaml". local_workspace_dir (optional): Set where is the local work space dir. If not set, Feathr will create a temporary folder to store local workspace related files. credential (optional): Azure credential to access cloud resources, most likely to be the returned result of DefaultAzureCredential(). If not set, Feathr will initialize DefaultAzureCredential() inside the __init__ function to get credentials. - project_registry_tag (optional): Adding tags for project in Feathr registry. This might be useful if you want to tag your project as deprecated, or allow certain customizations on project leve. Default is empty + project_registry_tag (optional): Adding tags for project in Feathr registry. This might be useful if you want to tag your project as deprecated, or allow certain customizations on project level. Default is empty use_env_vars (optional): Whether to use environment variables to set up the client. If set to False, the client will not use environment variables to set up the client. Defaults to True. """ self.logger = logging.getLogger(__name__) @@ -94,13 +96,19 @@ def __init__( self.project_name = self.env_config.get( 'project_config__project_name') - # Redis configs - self.redis_host = self.env_config.get( - 'online_store__redis__host') - self.redis_port = self.env_config.get( - 'online_store__redis__port') - self.redis_ssl_enabled = self.env_config.get( - 'online_store__redis__ssl_enabled') + # Redis configs. This is optional unless users have configured Redis host. + if self.env_config.get('online_store__redis__host'): + # For illustrative purposes. + spec = importlib.util.find_spec("redis") + if spec is None: + self.logger.warning('You have configured Redis host, but there is no local Redis client package. Install the package using "pip install redis". ') + self.redis_host = self.env_config.get( + 'online_store__redis__host') + self.redis_port = self.env_config.get( + 'online_store__redis__port') + self.redis_ssl_enabled = self.env_config.get( + 'online_store__redis__ssl_enabled') + self._construct_redis_client() # Offline store enabled configs; false by default self.s3_enabled = self.env_config.get( @@ -182,7 +190,6 @@ def __init__( master = self.env_config.get('spark_config__local__master') ) - self._construct_redis_client() self.secret_names = [] @@ -459,15 +466,16 @@ def _construct_redis_key(self, feature_table, key): key = self._COMPOSITE_KEY_SEPARATOR.join(key) return feature_table + self._KEY_SEPARATOR + key - def _str_to_bool(self, s): + def _str_to_bool(self, s: str, variable_name = None): """Define a function to detect convert string to bool, since Redis client sometimes require a bool and sometimes require a str """ - if s == 'True' or s == True: + if s.casefold() == 'True'.casefold() or s == True: return True - elif s == 'False' or s == False: + elif s.casefold() == 'False'.casefold() or s == False: return False else: - raise ValueError # evil ValueError that doesn't tell you what the wrong value was + self.logger.warning(f'{s} is not a valid Bool value. Maybe you want to double check if it is set correctly for {variable_name}.') + return s def _construct_redis_client(self): """Constructs the Redis client. The host, port, credential and other parameters can be set via environment @@ -477,13 +485,12 @@ def _construct_redis_client(self): host = self.redis_host port = self.redis_port ssl_enabled = self.redis_ssl_enabled - redis_client = redis.Redis( + self.redis_client = redis.Redis( host=host, port=port, password=password, - ssl=self._str_to_bool(ssl_enabled)) + ssl=self._str_to_bool(ssl_enabled, "ssl_enabled")) self.logger.info('Redis connection is successful and completed.') - self.redis_client = redis_client def get_offline_features(self, diff --git a/feathr_project/feathr/utils/_env_config_reader.py b/feathr_project/feathr/utils/_env_config_reader.py index 25f51be04..fd4555433 100644 --- a/feathr_project/feathr/utils/_env_config_reader.py +++ b/feathr_project/feathr/utils/_env_config_reader.py @@ -62,7 +62,7 @@ def get(self, key: str, default: str = None) -> str: if res is not None: return res - logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store.") + logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Using default value which is {default}.") return default @@ -80,12 +80,20 @@ def get_from_env_or_akv(self, key: str) -> str: Returns: Feathr client's config value. """ - conf_var = ( - self._get_variable_from_env(key) or - (self._get_variable_from_akv(key) if self.akv_name else None) - ) + res_env = (self._get_variable_from_env(key) if self.use_env_vars else None) + res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None) + + # rewrite the logic below to make sure: + # First we have the order (i.e. res1 > res2 > res3 > default) + # Also previously we use OR for the result, which will yield a bug where say res1=None, res2=False, res3=None. Using OR will result to None result, although res2 actually have value + for res in [res_env, res_keyvault]: + if res is not None: + return res + + logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.") + return None + - return conf_var def _get_variable_from_env(self, key: str) -> str: # make it work for lower case and upper case. diff --git a/feathr_project/setup.py b/feathr_project/setup.py index 2e741860e..d5f13b31c 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -113,4 +113,4 @@ "Operating System :: OS Independent", ], python_requires=">=3.7" -) +) \ No newline at end of file