From c1bf09e6294ee1123e26d89ad953e9cdd10dd54c Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Wed, 18 Jan 2023 03:26:34 -0800 Subject: [PATCH 1/2] Put numpy pinning back Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- feathr_project/setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/feathr_project/setup.py b/feathr_project/setup.py index d5f13b31c..78247a391 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -65,6 +65,9 @@ "py4j<=0.10.9.7", "loguru<=0.6.0", "pandas", + # pin numpy due to pyspark's deprecated np.bool access. + # Specifically, to resolve `AttributeError: module 'numpy' has no attribute 'bool'` when calling sparkDF.toPandas() + "numpy<=1.20.3", "redis<=4.4.0", "requests<=2.28.1", "tqdm<=4.64.1", From 0dbf1f1df2ae1a4a4d0302c2b720ffbf096f83d1 Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Thu, 19 Jan 2023 20:01:36 +0000 Subject: [PATCH 2/2] Remove pinning numpy and relevant codes causing pyspark error from the notebooks Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- .../databricks/databricks_quickstart_nyc_taxi_demo.ipynb | 4 ++-- docs/samples/nyc_taxi_demo.ipynb | 8 ++++---- feathr_project/setup.py | 5 +---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/docs/samples/databricks/databricks_quickstart_nyc_taxi_demo.ipynb b/docs/samples/databricks/databricks_quickstart_nyc_taxi_demo.ipynb index 07905d591..7b47e96ad 100644 --- a/docs/samples/databricks/databricks_quickstart_nyc_taxi_demo.ipynb +++ b/docs/samples/databricks/databricks_quickstart_nyc_taxi_demo.ipynb @@ -350,7 +350,7 @@ "\n", "# Download the data file\n", "df_raw = nyc_taxi.get_spark_df(spark=spark, local_cache_path=DATA_FILE_PATH)\n", - "df_raw.limit(5).toPandas()" + "df_raw.limit(5).show()" ] }, { @@ -1203,7 +1203,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.10.9 (main, Jan 11 2023, 15:21:40) [GCC 11.2.0]" }, "vscode": { "interpreter": { diff --git a/docs/samples/nyc_taxi_demo.ipynb b/docs/samples/nyc_taxi_demo.ipynb index c9eb11bba..9ded33149 100644 --- a/docs/samples/nyc_taxi_demo.ipynb +++ b/docs/samples/nyc_taxi_demo.ipynb @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -417,7 +417,7 @@ "# Download the data file\n", "data_file_path = f\"{WORKING_DIR}/nyc_taxi_data.csv\"\n", "df_raw = nyc_taxi.get_spark_df(spark=spark, local_cache_path=data_file_path)\n", - "df_raw.limit(5).toPandas()" + "df_raw.limit(5).show()" ] }, { @@ -790,7 +790,7 @@ " client=client,\n", " data_format=DATA_FORMAT,\n", ")\n", - "df.select(feature_names).limit(5).toPandas()" + "df.select(feature_names).limit(5).show()" ] }, { @@ -1154,7 +1154,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]" + "version": "3.10.9" }, "vscode": { "interpreter": { diff --git a/feathr_project/setup.py b/feathr_project/setup.py index 78247a391..699f6d688 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -65,9 +65,6 @@ "py4j<=0.10.9.7", "loguru<=0.6.0", "pandas", - # pin numpy due to pyspark's deprecated np.bool access. - # Specifically, to resolve `AttributeError: module 'numpy' has no attribute 'bool'` when calling sparkDF.toPandas() - "numpy<=1.20.3", "redis<=4.4.0", "requests<=2.28.1", "tqdm<=4.64.1", @@ -77,7 +74,7 @@ "pyyaml<=6.0", "Jinja2<=3.1.2", "pyarrow<=9.0.0", - "pyspark>=3.1.2", + "pyspark>=3.1.2", # TODO upgrade the version once pyspark publishes new release to resolve `AttributeError: module 'numpy' has no attribute 'bool'` "python-snappy<=0.6.1", "deltalake>=0.6.2", "graphlib_backport<=1.0.3",