Thanks to visit codestin.com
Credit goes to github.com

Skip to content

docs: deprecate default model in TextEmbedddingGenerator, GeminiTextGenerator, and other bigframes.ml.llm classes #1570

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 55 additions & 24 deletions bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@
"You should use this model name only if you are sure that it is supported in BigQuery."
)

_REMOVE_DEFAULT_MODEL_WARNING = "Since upgrading the default model can cause unintended breakages, the default model will be removed in BigFrames 3.0. Please supply an explicit model to avoid this message."


@log_adapter.class_logger
class TextEmbeddingGenerator(base.RetriableRemotePredictor):
Expand All @@ -113,7 +115,8 @@ class TextEmbeddingGenerator(base.RetriableRemotePredictor):
The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
or "text-multilingual-embedding-002". text-embedding models returns model embeddings for text inputs.
text-multilingual-embedding models returns model embeddings for text inputs which support over 100 languages.
Default to "text-embedding-004".
If no setting is provided, "text-embedding-004" will be used by
default and a warning will be issued.
session (bigframes.Session or None):
BQ session to create the model. If None, use the global default session.
connection_name (str or None):
Expand All @@ -124,14 +127,20 @@ class TextEmbeddingGenerator(base.RetriableRemotePredictor):
def __init__(
self,
*,
model_name: Literal[
"text-embedding-005",
"text-embedding-004",
"text-multilingual-embedding-002",
] = "text-embedding-004",
model_name: Optional[
Literal[
"text-embedding-005",
"text-embedding-004",
"text-multilingual-embedding-002",
]
] = None,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
):
if model_name is None:
model_name = "text-embedding-004"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)
self.model_name = model_name
self.session = session or global_session.get_global_session()
self.connection_name = connection_name
Expand Down Expand Up @@ -256,7 +265,8 @@ class MultimodalEmbeddingGenerator(base.RetriableRemotePredictor):
Args:
model_name (str, Default to "multimodalembedding@001"):
The model for multimodal embedding. Can set to "multimodalembedding@001". Multimodal-embedding models returns model embeddings for text, image and video inputs.
Default to "multimodalembedding@001".
If no setting is provided, "multimodalembedding@001" will be used by
default and a warning will be issued.
session (bigframes.Session or None):
BQ session to create the model. If None, use the global default session.
connection_name (str or None):
Expand All @@ -267,12 +277,16 @@ class MultimodalEmbeddingGenerator(base.RetriableRemotePredictor):
def __init__(
self,
*,
model_name: Literal["multimodalembedding@001"] = "multimodalembedding@001",
model_name: Optional[Literal["multimodalembedding@001"]] = None,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
):
if not bigframes.options.experiments.blob:
raise NotImplementedError()
if model_name is None:
model_name = "multimodalembedding@001"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)
self.model_name = model_name
self.session = session or global_session.get_global_session()
self.connection_name = connection_name
Expand Down Expand Up @@ -408,7 +422,8 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
"gemini-1.5-pro-001", "gemini-1.5-pro-002", "gemini-1.5-flash-001",
"gemini-1.5-flash-002", "gemini-2.0-flash-exp",
"gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001".
Default to "gemini-2.0-flash-001".
If no setting is provided, "gemini-2.0-flash-001" will be used by
default and a warning will be issued.

.. note::
"gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
Expand All @@ -429,17 +444,19 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
def __init__(
self,
*,
model_name: Literal[
"gemini-1.5-pro-preview-0514",
"gemini-1.5-flash-preview-0514",
"gemini-1.5-pro-001",
"gemini-1.5-pro-002",
"gemini-1.5-flash-001",
"gemini-1.5-flash-002",
"gemini-2.0-flash-exp",
"gemini-2.0-flash-001",
"gemini-2.0-flash-lite-001",
] = "gemini-2.0-flash-001",
model_name: Optional[
Literal[
"gemini-1.5-pro-preview-0514",
"gemini-1.5-flash-preview-0514",
"gemini-1.5-pro-001",
"gemini-1.5-pro-002",
"gemini-1.5-flash-001",
"gemini-1.5-flash-002",
"gemini-2.0-flash-exp",
"gemini-2.0-flash-001",
"gemini-2.0-flash-lite-001",
]
] = None,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
max_iterations: int = 300,
Expand All @@ -454,6 +471,10 @@ def __init__(
"(https://cloud.google.com/products#product-launch-stages)."
)
warnings.warn(msg, category=exceptions.PreviewWarning)
if model_name is None:
model_name = "gemini-2.0-flash-001"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)
self.model_name = model_name
self.session = session or global_session.get_global_session()
self.max_iterations = max_iterations
Expand Down Expand Up @@ -803,7 +824,8 @@ class Claude3TextGenerator(base.RetriableRemotePredictor):
"claude-3-5-sonnet" is Anthropic's most powerful AI model and maintains the speed and cost of Claude 3 Sonnet, which is a mid-tier model.
"claude-3-opus" is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks.
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#available-claude-models
Default to "claude-3-sonnet".
If no setting is provided, "claude-3-sonnet" will be used by default
and a warning will be issued.
session (bigframes.Session or None):
BQ session to create the model. If None, use the global default session.
connection_name (str or None):
Expand All @@ -815,12 +837,21 @@ class Claude3TextGenerator(base.RetriableRemotePredictor):
def __init__(
self,
*,
model_name: Literal[
"claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"
] = "claude-3-sonnet",
model_name: Optional[
Literal[
"claude-3-sonnet",
"claude-3-haiku",
"claude-3-5-sonnet",
"claude-3-opus",
]
] = None,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
):
if model_name is None:
model_name = "claude-3-sonnet"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)
self.model_name = model_name
self.session = session or global_session.get_global_session()
self.connection_name = connection_name
Expand Down
16 changes: 16 additions & 0 deletions tests/system/small/ml/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,3 +762,19 @@ def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name)
def test_gemini_preview_model_warnings(model_name):
with pytest.warns(exceptions.PreviewWarning):
llm.GeminiTextGenerator(model_name=model_name)


@pytest.mark.parametrize(
"model_class",
[
llm.TextEmbeddingGenerator,
llm.MultimodalEmbeddingGenerator,
llm.GeminiTextGenerator,
llm.Claude3TextGenerator,
],
)
def test_text_embedding_generator_no_default_model_warning(model_class):
message = "Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message."
bigframes.options.experiments.blob = True
with pytest.warns(FutureWarning, match=message):
model_class(model_name=None)