diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py index 430abc8ef0..360292dd80 100644 --- a/bigframes/_config/display_options.py +++ b/bigframes/_config/display_options.py @@ -26,8 +26,12 @@ class DisplayOptions: __doc__ = vendored_pandas_config.display_options_doc + # Options borrowed from pandas. max_columns: int = 20 - max_rows: int = 25 + max_rows: int = 10 + precision: int = 6 + + # Options unique to BigQuery DataFrames. progress_bar: Optional[str] = "auto" repr_mode: Literal["head", "deferred", "anywidget"] = "head" @@ -52,6 +56,8 @@ def pandas_repr(display_options: DisplayOptions): display_options.max_columns, "display.max_rows", display_options.max_rows, + "display.precision", + display_options.precision, "display.show_dimensions", True, ) as pandas_context: diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 4559d7cbb9..f9113e11c0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -23,6 +23,7 @@ import re import sys import textwrap +import traceback import typing from typing import ( Callable, @@ -814,7 +815,9 @@ def _repr_html_(self) -> str: except (AttributeError, ValueError, ImportError): # Fallback if anywidget is not available warnings.warn( - "Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode." + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to deferred mode. Error: {traceback.format_exc()}" ) return formatter.repr_query_job(self._compute_dry_run()) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 8bbb72c11a..5a20ddcb7f 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -23,6 +23,7 @@ import pandas as pd import bigframes +import bigframes.display.html # anywidget and traitlets are optional dependencies. We don't want the import of this # module to fail if they aren't installed, though. Instead, we try to limit the surface that @@ -201,12 +202,9 @@ def _set_table_html(self): page_data = cached_data.iloc[start:end] # Generate HTML table - self.table_html = page_data.to_html( - index=False, - max_rows=None, + self.table_html = bigframes.display.html.render_html( + dataframe=page_data, table_id=f"table-{self._table_id}", - classes="table table-striped table-hover", - escape=False, ) @traitlets.observe("page") diff --git a/bigframes/display/html.py b/bigframes/display/html.py new file mode 100644 index 0000000000..f1133789b4 --- /dev/null +++ b/bigframes/display/html.py @@ -0,0 +1,79 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""HTML rendering for DataFrames and other objects.""" + +from __future__ import annotations + +import html + +import pandas as pd +import pandas.api.types + +from bigframes._config import options + + +def _is_dtype_numeric(dtype) -> bool: + """Check if a dtype is numeric for alignment purposes.""" + return pandas.api.types.is_numeric_dtype(dtype) + + +def render_html( + *, + dataframe: pd.DataFrame, + table_id: str, +) -> str: + """Render a pandas DataFrame to HTML with specific styling.""" + classes = "dataframe table table-striped table-hover" + table_html = [f''] + precision = options.display.precision + + # Render table head + table_html.append(" ") + table_html.append(' ') + for col in dataframe.columns: + table_html.append( + f' ' + ) + table_html.append(" ") + table_html.append(" ") + + # Render table body + table_html.append(" ") + for i in range(len(dataframe)): + table_html.append(" ") + row = dataframe.iloc[i] + for col_name, value in row.items(): + dtype = dataframe.dtypes.loc[col_name] # type: ignore + align = "right" if _is_dtype_numeric(dtype) else "left" + table_html.append( + ' ") + table_html.append(" ") + table_html.append(" ") + table_html.append("
{html.escape(str(col))}
'.format(align) + ) + + # TODO(b/438181139): Consider semi-exploding ARRAY/STRUCT columns + # into multiple rows/columns like the BQ UI does. + if pandas.api.types.is_scalar(value) and pd.isna(value): + table_html.append(' <NA>') + else: + if isinstance(value, float): + formatted_value = f"{value:.{precision}f}" + table_html.append(f" {html.escape(formatted_value)}") + else: + table_html.append(f" {html.escape(str(value))}") + table_html.append("
") + + return "\n".join(table_html) diff --git a/notebooks/.gitignore b/notebooks/.gitignore new file mode 100644 index 0000000000..87620ac7e7 --- /dev/null +++ b/notebooks/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints/ diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index f6380a9fd4..617329ba65 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -55,6 +55,7 @@ "metadata": {}, "outputs": [], "source": [ + "bpd.options.bigquery.ordering_mode = \"partial\"\n", "bpd.options.display.repr_mode = \"anywidget\"" ] }, @@ -75,7 +76,7 @@ { "data": { "text/html": [ - "Query job c5fcfd5e-1617-49c8-afa3-86ca21019de4 is DONE. 0 Bytes processed. Open Job" + "Query job a643d120-4af9-44fc-ba3c-ed461cf1092b is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -115,7 +116,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 171.4 MB\n" + "Computation deferred. Computation will process 44.4 MB\n" ] } ], @@ -138,27 +139,15 @@ "id": "ce250157", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "Query job ab900a53-5011-4e80-85d5-0ef2958598db is DONE. 171.4 MB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bda63ba739dc4d5f83a5e18eb27b2686", + "model_id": "d2d4ef22ea9f414b89ea5bd85f0e6635", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "TableWidget(row_count=5552452, table_html='` and `
`) have `style="text-align: right;"`. + - Assert that the string and boolean columns have `style="text-align: left;"`. +- [ ] **Scenario 2: Precision.** + - Create a `bigframes.dataframe.DataFrame` with a `FLOAT64` column containing a number with many decimal places (e.g., `3.14159265`). + - Set `bigframes.options.display.precision = 4`. + - Render the `pandas.DataFrame` to HTML. + - Assert that the output string contains the number formatted to 4 decimal places (e.g., `3.1416`). + - Remember to reset the option value after the test to avoid side effects. +- [ ] Check these items off with `[x]` as they are completed. + +## Verification + +*Specify the commands to run to verify the changes.* + +- [ ] The `nox -r -s format lint lint_setup_py` linter should pass. +- [ ] The `nox -r -s mypy` static type checker should pass. +- [ ] The `nox -r -s docs docfx` docs should successfully build and include relevant docs in the output. +- [ ] All new and existing unit tests `pytest tests/unit` should pass. +- [ ] Identify all related system tests in the `tests/system` directories. +- [ ] All related system tests `pytest tests/system/small/path_to_relevant_test.py::test_name` should pass. +- [ ] Check these items off with `[x]` as they are completed. + +## Constraints + +Follow the guidelines listed in GEMINI.md at the root of the repository. diff --git a/tests/unit/_config/test_threaded_options.py b/tests/unit/_config/test_threaded_options.py index 7fc97a9f72..b16a3550bc 100644 --- a/tests/unit/_config/test_threaded_options.py +++ b/tests/unit/_config/test_threaded_options.py @@ -37,5 +37,5 @@ def mutate_options_threaded(options, result_dict): assert result_dict["this_before"] == 50 assert result_dict["this_after"] == 50 - assert result_dict["other_before"] == 25 + assert result_dict["other_before"] == 10 assert result_dict["other_after"] == 100 diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py new file mode 100644 index 0000000000..fcf1455362 --- /dev/null +++ b/tests/unit/display/test_html.py @@ -0,0 +1,151 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pandas as pd +import pyarrow as pa +import pytest + +import bigframes as bf +import bigframes.display.html as bf_html + + +@pytest.mark.parametrize( + ("data", "expected_alignments", "expected_strings"), + [ + pytest.param( + { + "string_col": ["a", "b", "c"], + "int_col": [1, 2, 3], + "float_col": [1.1, 2.2, 3.3], + "bool_col": [True, False, True], + }, + { + "string_col": "left", + "int_col": "right", + "float_col": "right", + "bool_col": "left", + }, + ["1.100000", "2.200000", "3.300000"], + id="scalars", + ), + pytest.param( + { + "timestamp_col": pa.array( + [ + datetime.datetime.fromisoformat(value) + for value in [ + "2024-01-01 00:00:00", + "2024-01-01 00:00:01", + "2024-01-01 00:00:02", + ] + ], + pa.timestamp("us", tz="UTC"), + ), + "datetime_col": pa.array( + [ + datetime.datetime.fromisoformat(value) + for value in [ + "2027-06-05 04:03:02.001", + "2027-01-01 00:00:01", + "2027-01-01 00:00:02", + ] + ], + pa.timestamp("us"), + ), + "date_col": pa.array( + [ + datetime.date(1999, 1, 1), + datetime.date(1999, 1, 2), + datetime.date(1999, 1, 3), + ], + pa.date32(), + ), + "time_col": pa.array( + [ + datetime.time(11, 11, 0), + datetime.time(11, 11, 1), + datetime.time(11, 11, 2), + ], + pa.time64("us"), + ), + }, + { + "timestamp_col": "left", + "datetime_col": "left", + "date_col": "left", + "time_col": "left", + }, + [ + "2024-01-01 00:00:00", + "2027-06-05 04:03:02.001", + "1999-01-01", + "11:11:01", + ], + id="datetimes", + ), + pytest.param( + { + "array_col": pd.Series( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ), + }, + { + "array_col": "left", + }, + ["[1, 2, 3]", "[4, 5, 6]", "[7, 8, 9]"], + id="array", + ), + pytest.param( + { + "struct_col": pd.Series( + [{"v": 1}, {"v": 2}, {"v": 3}], + dtype=pd.ArrowDtype(pa.struct([("v", pa.int64())])), + ), + }, + { + "struct_col": "left", + }, + ["{'v': 1}", "{'v': 2}", "{'v': 3}"], + id="struct", + ), + ], +) +def test_render_html_alignment_and_precision( + data, expected_alignments, expected_strings +): + df = pd.DataFrame(data) + html = bf_html.render_html(dataframe=df, table_id="test-table") + + for _, align in expected_alignments.items(): + assert 'th style="text-align: left;"' in html + assert f'>> df.melt() - variable value - 0 A 1.0 - 1 A - 2 A 3.0 - 3 A 4.0 - 4 A 5.0 - 5 B 1.0 - 6 B 2.0 - 7 B 3.0 - 8 B 4.0 - 9 B 5.0 - 10 C - 11 C 3.5 - 12 C - 13 C 4.5 - 14 C 5.0 + variable value + 0 A 1.0 + 1 A + 2 A 3.0 + 3 A 4.0 + 4 A 5.0 + 5 B 1.0 + 6 B 2.0 + 7 B 3.0 + 8 B 4.0 + 9 B 5.0 + ... [15 rows x 2 columns]