diff --git a/lib/streamlit/dataframe_util.py b/lib/streamlit/dataframe_util.py index 8b228221fa3..a55a4646d53 100644 --- a/lib/streamlit/dataframe_util.py +++ b/lib/streamlit/dataframe_util.py @@ -41,12 +41,14 @@ from streamlit import config, errors, logger, string_util from streamlit.type_util import ( CustomDict, + dump_pydantic_sequence, has_callable_attr, is_custom_dict, is_dataclass_instance, is_list_like, is_namedtuple, is_pydantic_model, + is_sequence_of_pydantic_models, is_type, is_version_less_than, ) @@ -703,6 +705,14 @@ def convert_anything_to_pandas_df( if is_snowpark_row_list(data): return pd.DataFrame([row.as_dict() for row in data]) + if is_sequence_of_pydantic_models(data): + # Try to convert pydantic models to DataFrame. If some elements are not + # pydantic models (mixed sequence), fall through to pandas' native handling. + try: + return pd.DataFrame(dump_pydantic_sequence(data)) + except AttributeError: + pass + if has_callable_attr(data, "to_pandas"): return pd.DataFrame(data.to_pandas()) @@ -1242,7 +1252,7 @@ def determine_data_format(input_data: Any) -> DataFormat: # This should always contain at least one element, # otherwise the values type from infer_dtype would have been empty first_element = next(iter(input_data)) - if isinstance(first_element, dict): + if isinstance(first_element, dict) or is_pydantic_model(first_element): return DataFormat.LIST_OF_RECORDS if isinstance(first_element, (list, tuple, set, frozenset)): return DataFormat.LIST_OF_ROWS diff --git a/lib/streamlit/elements/json.py b/lib/streamlit/elements/json.py index 608d009cc64..84165056d8e 100644 --- a/lib/streamlit/elements/json.py +++ b/lib/streamlit/elements/json.py @@ -27,10 +27,12 @@ from streamlit.proto.Json_pb2 import Json as JsonProto from streamlit.runtime.metrics_util import gather_metrics from streamlit.type_util import ( + dump_pydantic_sequence, is_custom_dict, is_list_like, is_namedtuple, is_pydantic_model, + is_sequence_of_pydantic_models, ) if TYPE_CHECKING: @@ -120,7 +122,14 @@ def json( body = dict(body) # type: ignore if is_list_like(body): - body = list(body) # ty: ignore[invalid-argument-type] + if is_sequence_of_pydantic_models(body): + try: + body = dump_pydantic_sequence(body) + except AttributeError: + # Fallback to list(body) if it contains non-Pydantic models: + body = list(body) # ty: ignore[invalid-argument-type] + else: + body = list(body) # ty: ignore[invalid-argument-type] if not isinstance(body, str): try: diff --git a/lib/streamlit/type_util.py b/lib/streamlit/type_util.py index 3ae7a4a95e3..5b5fb99d7fe 100644 --- a/lib/streamlit/type_util.py +++ b/lib/streamlit/type_util.py @@ -331,6 +331,24 @@ def is_pydantic_model(obj: object) -> bool: return _is_type_instance(obj, "pydantic.main.BaseModel") +def is_sequence_of_pydantic_models(obj: object) -> TypeGuard[Sequence[Any]]: + """True if obj is a non-empty list/tuple/set/frozenset of Pydantic model instances.""" + if not isinstance(obj, (list, tuple, set, frozenset)) or len(obj) == 0: + return False + first_element = next(iter(obj)) + return is_pydantic_model(first_element) + + +def dump_pydantic_sequence(obj: Sequence[object]) -> list[dict[str, Any]]: + """Dump a sequence of Pydantic models to a list of dictionaries.""" + first_element = next(iter(obj)) + # Pydantic v2 uses model_dump(), v1 uses dict() + if has_callable_attr(first_element, "model_dump"): + # Use mode="json" to ensure proper serialization of types like Decimal + return [item.model_dump(mode="json") for item in obj] # type: ignore + return [item.dict() for item in obj] # type: ignore + + def _is_from_streamlit(obj: object) -> bool: """True if the object is from the streamlit package.""" return obj.__class__.__module__.startswith("streamlit") diff --git a/lib/tests/streamlit/data_test_cases.py b/lib/tests/streamlit/data_test_cases.py index d26b356b170..58fb5cfc0b1 100644 --- a/lib/tests/streamlit/data_test_cases.py +++ b/lib/tests/streamlit/data_test_cases.py @@ -1250,6 +1250,61 @@ class ElementPydanticModel(BaseModel): dict, ), ), + ( + "List of Pydantic Models", + [ + ElementPydanticModel( + name="st.number_input", is_widget=True, usage=0.32 + ), + ElementPydanticModel( + name="st.text_input", is_widget=True, usage=0.45 + ), + ], + CaseMetadata( + 2, + 3, + DataFormat.LIST_OF_RECORDS, + [ + ElementPydanticModel( + name="st.number_input", is_widget=True, usage=0.32 + ), + ElementPydanticModel( + name="st.text_input", is_widget=True, usage=0.45 + ), + ], + "json", + False, + list, + ), + ), + ( + "Tuple of Pydantic Models", + ( + ElementPydanticModel( + name="st.number_input", is_widget=True, usage=0.32 + ), + ElementPydanticModel( + name="st.text_input", is_widget=True, usage=0.45 + ), + ), + CaseMetadata( + 2, + 3, + DataFormat.LIST_OF_RECORDS, + [ + ElementPydanticModel( + name="st.number_input", is_widget=True, usage=0.32 + ), + ElementPydanticModel( + name="st.text_input", is_widget=True, usage=0.45 + ), + ], + "json", + False, + # LIST_OF_RECORDS always converts back to list, not tuple + list, + ), + ), ] ) except ModuleNotFoundError: