From 87ae587018beba356c4e959cf75df50ccaac4af7 Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Tue, 4 Oct 2022 23:43:41 +0000 Subject: [PATCH 1/3] Add version. Fix function string parsing Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- feathr_project/feathr/__init__.py | 5 +++ .../udf/_preprocessing_pyudf_manager.py | 41 +++++++++++-------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/feathr_project/feathr/__init__.py b/feathr_project/feathr/__init__.py index fae0cb60c..9b0cf0a49 100644 --- a/feathr_project/feathr/__init__.py +++ b/feathr_project/feathr/__init__.py @@ -1,3 +1,5 @@ +import pkg_resources + from .client import FeathrClient from .spark_provider.feathr_configurations import SparkExecutionConfiguration from .definition.feature_derivations import * @@ -74,3 +76,6 @@ 'FeaturePrinter', 'SparkExecutionConfiguration', ] + + +__version__ = pkg_resources.require("feathr")[0].version diff --git a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py index ca7114343..1b592b4e7 100644 --- a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py +++ b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py @@ -1,12 +1,15 @@ +import ast import inspect import os +import pickle from pathlib import Path from typing import List, Optional, Union -import pickle -from feathr.definition.anchor import FeatureAnchor + from jinja2 import Template + +from feathr.definition.anchor import FeatureAnchor from feathr.definition.source import HdfsSource -import ast + # Some metadata that are only needed by Feathr FEATHR_PYSPARK_METADATA = 'generated_feathr_pyspark_metadata' @@ -42,7 +45,7 @@ def build_anchor_preprocessing_metadata(anchor_list: List[FeatureAnchor], local_ # delete the file if it already exists to avoid caching previous results for f in [client_udf_repo_path, metadata_path, pyspark_driver_path]: if os.path.exists(f): - os.remove(f) + os.remove(f) for anchor in anchor_list: # only support batch source preprocessing for now. @@ -73,23 +76,25 @@ def build_anchor_preprocessing_metadata(anchor_list: List[FeatureAnchor], local_ with open(feathr_pyspark_metadata_abs_path, 'wb') as file: pickle.dump(features_with_preprocessing, file) + @staticmethod - def _parse_function_str_for_name(source: str) -> str: - """ - Use AST to parse the functions and get the name out. + def _parse_function_str_for_name(fn_str: str) -> str: + """Use AST to parse the function string and get the name out. + + Args: + fn_str: Function code in string. + + Returns: + Name of the function. """ - if source is None: + if not fn_str: return None - tree = ast.parse(source) + + tree = ast.parse(fn_str) if len(tree.body) != 1 or not isinstance(tree.body[0], ast.FunctionDef): - raise ValueError('provided code fragment is not a single function') - code = compile(source=tree, filename='custom.py',mode= 'exec') - # https://docs.python.org/3/library/inspect.html see the inspect module for more details - # tuple of names other than arguments and function locals. Assume there will be only one function, so will return the first as the name - for ele in code.co_consts: - # find the first object, that is the str, this will be the name of the function - if isinstance(ele, str): - return ele + raise ValueError("provided code fragment is not a single function") + + return tree.body[0].name @staticmethod @@ -174,7 +179,7 @@ def prepare_pyspark_udf_files(feature_names: List[str], local_workspace_dir): client_udf_repo_path = os.path.join(local_workspace_dir, FEATHR_CLIENT_UDF_FILE_NAME) # write pyspark_driver_template_abs_path and then client_udf_repo_path filenames = [pyspark_driver_template_abs_path, client_udf_repo_path] - + with open(pyspark_driver_path, 'w') as outfile: for fname in filenames: with open(fname) as infile: From 0fd617ae0bda81bed14b4b1fc22f8e94347d9fb8 Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Wed, 5 Oct 2022 09:34:46 +0000 Subject: [PATCH 2/3] Add unit test Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- .../unit/udf/test_preprocessing_pyudf_manager.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 feathr_project/test/unit/udf/test_preprocessing_pyudf_manager.py diff --git a/feathr_project/test/unit/udf/test_preprocessing_pyudf_manager.py b/feathr_project/test/unit/udf/test_preprocessing_pyudf_manager.py new file mode 100644 index 000000000..1daa87632 --- /dev/null +++ b/feathr_project/test/unit/udf/test_preprocessing_pyudf_manager.py @@ -0,0 +1,15 @@ +import pytest + +from feathr.udf._preprocessing_pyudf_manager import _PreprocessingPyudfManager + + +@pytest.mark.parametrize( + "fn_name, fn_str", + [ + ("fn_without_type_hint", "def fn_without_type_hint(a):\n return a + 10\n"), + ("fn_with_type_hint", "def fn_with_type_hint(a: int) -> int:\n return a + 10\n"), + ("fn_with_complex_type_hint", "def fn_with_complex_type_hint(a: Union[int, float]) -> Union[int, float]:\n return a + 10\n"), + ] +) +def test__parse_function_str_for_name(fn_name, fn_str): + assert fn_name == _PreprocessingPyudfManager._parse_function_str_for_name(fn_str) From 42b82153f3dacfb8c5f92f4924ae1e584ba96c2f Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Thu, 6 Oct 2022 04:07:36 +0000 Subject: [PATCH 3/3] Add comments Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- feathr_project/feathr/udf/_preprocessing_pyudf_manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py index 1b592b4e7..55756ba3d 100644 --- a/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py +++ b/feathr_project/feathr/udf/_preprocessing_pyudf_manager.py @@ -91,9 +91,13 @@ def _parse_function_str_for_name(fn_str: str) -> str: return None tree = ast.parse(fn_str) + + # tree.body contains a list of function definition objects parsed from the input string. + # Currently, we only accept a single function. if len(tree.body) != 1 or not isinstance(tree.body[0], ast.FunctionDef): raise ValueError("provided code fragment is not a single function") + # Get the function name from the function definition. return tree.body[0].name