From cb8d1237e85ccfe7ff09a09e08c58b452d1d8da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 11 Aug 2025 14:00:46 +0000 Subject: [PATCH 1/7] feat: add `bigframes.pandas.options.display.precision` option feat: when using `repr_mode = "anywidget"`, numeric values align right --- bigframes/_config/display_options.py | 6 + bigframes/display/anywidget.py | 8 +- bigframes/display/html.py | 74 ++++++++++ notebooks/.gitignore | 1 + notebooks/dataframes/anywidget_mode.ipynb | 57 ++++---- specs/2025-08-11-anywidget-align-text.md | 132 ++++++++++++++++++ tests/unit/display/test_html.py | 66 +++++++++ .../pandas/core/config_init.py | 3 + 8 files changed, 316 insertions(+), 31 deletions(-) create mode 100644 bigframes/display/html.py create mode 100644 notebooks/.gitignore create mode 100644 specs/2025-08-11-anywidget-align-text.md create mode 100644 tests/unit/display/test_html.py diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py index 430abc8ef0..0ae94572db 100644 --- a/bigframes/_config/display_options.py +++ b/bigframes/_config/display_options.py @@ -26,8 +26,12 @@ class DisplayOptions: __doc__ = vendored_pandas_config.display_options_doc + # Options borrowed from pandas. max_columns: int = 20 max_rows: int = 25 + precision: int = 6 + + # Options unique to BigQuery DataFrames. progress_bar: Optional[str] = "auto" repr_mode: Literal["head", "deferred", "anywidget"] = "head" @@ -52,6 +56,8 @@ def pandas_repr(display_options: DisplayOptions): display_options.max_columns, "display.max_rows", display_options.max_rows, + "display.precision", + display_options.precision, "display.show_dimensions", True, ) as pandas_context: diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 8bbb72c11a..5a20ddcb7f 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -23,6 +23,7 @@ import pandas as pd import bigframes +import bigframes.display.html # anywidget and traitlets are optional dependencies. We don't want the import of this # module to fail if they aren't installed, though. Instead, we try to limit the surface that @@ -201,12 +202,9 @@ def _set_table_html(self): page_data = cached_data.iloc[start:end] # Generate HTML table - self.table_html = page_data.to_html( - index=False, - max_rows=None, + self.table_html = bigframes.display.html.render_html( + dataframe=page_data, table_id=f"table-{self._table_id}", - classes="table table-striped table-hover", - escape=False, ) @traitlets.observe("page") diff --git a/bigframes/display/html.py b/bigframes/display/html.py new file mode 100644 index 0000000000..33ba5d5ff9 --- /dev/null +++ b/bigframes/display/html.py @@ -0,0 +1,74 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""HTML rendering for DataFrames and other objects.""" + +from __future__ import annotations + +import html + +import pandas as pd +import pandas.api.types + +from bigframes._config import options + + +def _is_dtype_numeric(dtype) -> bool: + """Check if a dtype is numeric for alignment purposes.""" + return pandas.api.types.is_numeric_dtype(dtype) + + +def render_html( + *, + dataframe: pd.DataFrame, + table_id: str, +) -> str: + """Render a pandas DataFrame to HTML with specific styling.""" + classes = "table table-striped table-hover" + table_html = [f''] + precision = options.display.precision + + # Render table head + table_html.append(" ") + table_html.append(' ') + for col in dataframe.columns: + table_html.append( + f' ' + ) + table_html.append(" ") + table_html.append(" ") + + # Render table body + table_html.append(" ") + for i in range(len(dataframe)): + table_html.append(" ") + row = dataframe.iloc[i] + for col_name, value in row.items(): + dtype = dataframe.dtypes.loc[col_name] # type: ignore + align = "right" if _is_dtype_numeric(dtype) else "left" + table_html.append(' ") + table_html.append(" ") + table_html.append(" ") + table_html.append("
{html.escape(str(col))}
'.format(align)) + if pd.isna(value): + table_html.append(" NaN") + else: + if isinstance(value, float): + formatted_value = f"{value:.{precision}f}" + table_html.append(f" {html.escape(formatted_value)}") + else: + table_html.append(f" {html.escape(str(value))}") + table_html.append("
") + + return "\n".join(table_html) diff --git a/notebooks/.gitignore b/notebooks/.gitignore new file mode 100644 index 0000000000..87620ac7e7 --- /dev/null +++ b/notebooks/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints/ diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index f6380a9fd4..dda82e15c8 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -55,6 +55,7 @@ "metadata": {}, "outputs": [], "source": [ + "bpd.options.bigquery.ordering_mode = \"partial\"\n", "bpd.options.display.repr_mode = \"anywidget\"" ] }, @@ -75,7 +76,7 @@ { "data": { "text/html": [ - "Query job c5fcfd5e-1617-49c8-afa3-86ca21019de4 is DONE. 0 Bytes processed. Open Job" + "Query job 70c9da87-db73-4794-8ad7-a3824a307fb7 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -115,7 +116,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 171.4 MB\n" + "Computation deferred. Computation will process 44.4 MB\n" ] } ], @@ -138,27 +139,15 @@ "id": "ce250157", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "Query job ab900a53-5011-4e80-85d5-0ef2958598db is DONE. 171.4 MB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bda63ba739dc4d5f83a5e18eb27b2686", + "model_id": "7d9e68222fcc445a8a6c4ad3360495bc", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "TableWidget(row_count=5552452, table_html='` and `') for col in dataframe.columns: table_html.append( - f' ' + f' ' ) table_html.append(" ") table_html.append(" ") # Render table body - table_html.append(" ") + table_html.append(' ') for i in range(len(dataframe)): table_html.append(" ") row = dataframe.iloc[i] diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index dda82e15c8..617329ba65 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -76,7 +76,7 @@ { "data": { "text/html": [ - "Query job 70c9da87-db73-4794-8ad7-a3824a307fb7 is DONE. 0 Bytes processed. Open Job" + "Query job a643d120-4af9-44fc-ba3c-ed461cf1092b is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -142,12 +142,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7d9e68222fcc445a8a6c4ad3360495bc", + "model_id": "d2d4ef22ea9f414b89ea5bd85f0e6635", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "TableWidget(row_count=5552452, table_html='
`) have `style="text-align: right;"`. + - Assert that the string and boolean columns have `style="text-align: left;"`. +- [ ] **Scenario 2: Precision.** + - Create a `bigframes.dataframe.DataFrame` with a `FLOAT64` column containing a number with many decimal places (e.g., `3.14159265`). + - Set `bigframes.options.display.precision = 4`. + - Render the `pandas.DataFrame` to HTML. + - Assert that the output string contains the number formatted to 4 decimal places (e.g., `3.1416`). + - Remember to reset the option value after the test to avoid side effects. +- [ ] Check these items off with `[x]` as they are completed. + +## Verification + +*Specify the commands to run to verify the changes.* + +- [ ] The `nox -r -s format lint lint_setup_py` linter should pass. +- [ ] The `nox -r -s mypy` static type checker should pass. +- [ ] The `nox -r -s docs docfx` docs should successfully build and include relevant docs in the output. +- [ ] All new and existing unit tests `pytest tests/unit` should pass. +- [ ] Identify all related system tests in the `tests/system` directories. +- [ ] All related system tests `pytest tests/system/small/path_to_relevant_test.py::test_name` should pass. +- [ ] Check these items off with `[x]` as they are completed. + +## Constraints + +Follow the guidelines listed in GEMINI.md at the root of the repository. diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py new file mode 100644 index 0000000000..7eba1b2b68 --- /dev/null +++ b/tests/unit/display/test_html.py @@ -0,0 +1,66 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pytest + +import bigframes as bf +import bigframes.display.html as bf_html + + +@pytest.mark.parametrize( + ("data", "expected_alignments", "expected_strings"), + [ + ( + { + "string_col": ["a", "b", "c"], + "int_col": [1, 2, 3], + "float_col": [1.1, 2.2, 3.3], + "bool_col": [True, False, True], + }, + { + "string_col": "left", + "int_col": "right", + "float_col": "right", + "bool_col": "left", + }, + ["1.100000", "2.200000", "3.300000"], + ), + ], +) +def test_render_html_alignment_and_precision( + data, expected_alignments, expected_strings +): + df = pd.DataFrame(data) + html = bf_html.render_html(dataframe=df, table_id="test-table") + + for _, align in expected_alignments.items(): + assert 'th style="text-align: left;"' in html + assert f'' in html + + for expected_string in expected_strings: + assert expected_string in html + + +def test_render_html_precision(): + data = {"float_col": [3.14159265]} + df = pd.DataFrame(data) + + with bf.option_context("display.precision", 4): + html = bf_html.render_html(dataframe=df, table_id="test-table") + assert "3.1416" in html + + # Make sure we reset to default + html = bf_html.render_html(dataframe=df, table_id="test-table") + assert "3.141593" in html diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index 51d056a2c8..3425674e4f 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -84,6 +84,9 @@ memory_usage (bool): This specifies if the memory usage of a DataFrame should be displayed when df.info() is called. Valid values True,False, + precision (int): + Controls the floating point output precision, similar to + `pandas.options.display.precision`. blob_display (bool): Whether to display the blob content in notebook DataFrame preview. Default True. blob_display_width (int or None): From 91f90566f9a88b25dc99965e1e814fed5c7897ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Tue, 12 Aug 2025 15:03:16 +0000 Subject: [PATCH 2/7] include traceback in anywidet warning --- bigframes/dataframe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 4559d7cbb9..f9113e11c0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -23,6 +23,7 @@ import re import sys import textwrap +import traceback import typing from typing import ( Callable, @@ -814,7 +815,9 @@ def _repr_html_(self) -> str: except (AttributeError, ValueError, ImportError): # Fallback if anywidget is not available warnings.warn( - "Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode." + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to deferred mode. Error: {traceback.format_exc()}" ) return formatter.repr_query_job(self._compute_dry_run()) From 6e49352037955f9006ab6cf545c0c354904b6d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Tue, 12 Aug 2025 15:38:50 +0000 Subject: [PATCH 3/7] add tests for more complex data types --- bigframes/display/html.py | 7 ++- tests/unit/display/test_html.py | 87 ++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 33ba5d5ff9..1b70f46615 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -58,8 +58,11 @@ def render_html( dtype = dataframe.dtypes.loc[col_name] # type: ignore align = "right" if _is_dtype_numeric(dtype) else "left" table_html.append(' '.format(align)) - if pd.isna(value): - table_html.append(" NaN") + + # TODO(b/438181139): Consider semi-exploding ARRAY/STRUCT columns + # into multiple rows/columns like the BQ UI does. + if pandas.api.types.is_scalar(value) and pd.isna(value): + table_html.append(' <NA>') else: if isinstance(value, float): formatted_value = f"{value:.{precision}f}" diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py index 7eba1b2b68..e0708487b0 100644 --- a/tests/unit/display/test_html.py +++ b/tests/unit/display/test_html.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime + import pandas as pd +import pyarrow as pa import pytest import bigframes as bf @@ -22,7 +25,7 @@ @pytest.mark.parametrize( ("data", "expected_alignments", "expected_strings"), [ - ( + pytest.param( { "string_col": ["a", "b", "c"], "int_col": [1, 2, 3], @@ -36,6 +39,88 @@ "bool_col": "left", }, ["1.100000", "2.200000", "3.300000"], + id="scalars", + ), + pytest.param( + { + "timestamp_col": pa.array( + [ + datetime.datetime.fromisoformat(value) + for value in [ + "2024-01-01 00:00:00", + "2024-01-01 00:00:01", + "2024-01-01 00:00:02", + ] + ], + pa.timestamp("us", tz="UTC"), + ), + "datetime_col": pa.array( + [ + datetime.datetime.fromisoformat(value) + for value in [ + "2027-06-05 04:03:02.001", + "2027-01-01 00:00:01", + "2027-01-01 00:00:02", + ] + ], + pa.timestamp("us"), + ), + "date_col": pa.array( + [ + datetime.date(1999, 1, 1), + datetime.date(1999, 1, 2), + datetime.date(1999, 1, 3), + ], + pa.date32(), + ), + "time_col": pa.array( + [ + datetime.time(11, 11, 0), + datetime.time(11, 11, 1), + datetime.time(11, 11, 2), + ], + pa.time64("us"), + ), + }, + { + "timestamp_col": "left", + "datetime_col": "left", + "date_col": "left", + "time_col": "left", + }, + [ + "2024-01-01 00:00:00", + "2027-06-05 04:03:02.001", + "1999-01-01", + "11:11:01", + ], + id="datetimes", + ), + pytest.param( + { + "array_col": pd.Series( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ), + }, + { + "array_col": "left", + }, + ["[1, 2, 3]", "[4, 5, 6]", "[7, 8, 9]"], + id="array", + ), + pytest.param( + { + "struct_col": pd.Series( + [{"v": 1}, {"v": 2}, {"v": 3}], + dtype=pd.ArrowDtype(pa.struct([("v", pa.int64())])), + ), + }, + { + "struct_col": "left", + }, + ["{'v': 1}", "{'v': 2}", "{'v': 3}"], + id="struct", ), ], ) From 214969db2baa98d46b47a57cdc0362bff4dbe051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Tue, 12 Aug 2025 16:13:41 +0000 Subject: [PATCH 4/7] add padding --- bigframes/_config/display_options.py | 2 +- bigframes/display/html.py | 4 ++-- notebooks/dataframes/anywidget_mode.ipynb | 20 ++++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py index 0ae94572db..360292dd80 100644 --- a/bigframes/_config/display_options.py +++ b/bigframes/_config/display_options.py @@ -28,7 +28,7 @@ class DisplayOptions: # Options borrowed from pandas. max_columns: int = 20 - max_rows: int = 25 + max_rows: int = 10 precision: int = 6 # Options unique to BigQuery DataFrames. diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 1b70f46615..175ab62975 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -44,13 +44,13 @@ def render_html( table_html.append('
{html.escape(str(col))}
{html.escape(str(col))}
'] precision = options.display.precision @@ -50,14 +50,16 @@ def render_html( table_html.append(" ") # Render table body - table_html.append(' ') + table_html.append(" ") for i in range(len(dataframe)): table_html.append(" ") row = dataframe.iloc[i] for col_name, value in row.items(): dtype = dataframe.dtypes.loc[col_name] # type: ignore align = "right" if _is_dtype_numeric(dtype) else "left" - table_html.append('
'.format(align)) + table_html.append( + ' '.format(align) + ) # TODO(b/438181139): Consider semi-exploding ARRAY/STRUCT columns # into multiple rows/columns like the BQ UI does. diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py index e0708487b0..fcf1455362 100644 --- a/tests/unit/display/test_html.py +++ b/tests/unit/display/test_html.py @@ -132,7 +132,7 @@ def test_render_html_alignment_and_precision( for _, align in expected_alignments.items(): assert 'th style="text-align: left;"' in html - assert f'' in html + assert f' Date: Tue, 12 Aug 2025 19:18:14 +0000 Subject: [PATCH 7/7] fix doctest --- .../bigframes_vendored/pandas/core/frame.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 1f79c428c1..d3b8b51b65 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -5972,22 +5972,18 @@ def melt(self, id_vars, value_vars, var_name, value_name): Using `melt` without optional arguments: >>> df.melt() - variable value - 0 A 1.0 - 1 A - 2 A 3.0 - 3 A 4.0 - 4 A 5.0 - 5 B 1.0 - 6 B 2.0 - 7 B 3.0 - 8 B 4.0 - 9 B 5.0 - 10 C - 11 C 3.5 - 12 C - 13 C 4.5 - 14 C 5.0 + variable value + 0 A 1.0 + 1 A + 2 A 3.0 + 3 A 4.0 + 4 A 5.0 + 5 B 1.0 + 6 B 2.0 + 7 B 3.0 + 8 B 4.0 + 9 B 5.0 + ... [15 rows x 2 columns]