From 6727fb3761e91d32359e13b1c28494d2f16d165a Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Thu, 10 Jul 2025 20:07:44 -0700 Subject: [PATCH 01/13] feat: GenAI SDK client(evals) - Add Generate Rubrics API config and internal method PiperOrigin-RevId: 781795016 --- .../replays/test_internal_generate_rubrics.py | 170 +++++++++++++ vertexai/_genai/evals.py | 188 ++++++++++++++ vertexai/_genai/types.py | 240 ++++++++++++++++++ 3 files changed, 598 insertions(+) create mode 100644 tests/unit/vertexai/genai/replays/test_internal_generate_rubrics.py diff --git a/tests/unit/vertexai/genai/replays/test_internal_generate_rubrics.py b/tests/unit/vertexai/genai/replays/test_internal_generate_rubrics.py new file mode 100644 index 0000000000..ce8e24138c --- /dev/null +++ b/tests/unit/vertexai/genai/replays/test_internal_generate_rubrics.py @@ -0,0 +1,170 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + + +from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types + +_TEST_RUBRIC_GENERATION_PROMPT = """SPECIAL INSTRUCTION: think silently. Silent thinking token budget: 16384. + +You are a teacher who is responsible for scoring a student\'s response to a prompt. In order to score that response, you must write down a rubric for each prompt. That rubric states what properties the response must have in order to be a valid response to the prompt. Properties are weighted by importance via the "importance" field. + +Rubric requirements: +- Properties either exist or don\'t exist. +- Properties can be either implicit in the prompt or made explicit by the prompt. +- Make sure to always include the correct expected human language as one of the properties. If the prompt asks for code, the programming language should be covered by a separate property. +- The correct expected language may be explicit in the text of the prompt but is usually simply implicit in the prompt itself. +- Be as comprehensive as possible with the list of properties in the rubric. +- All properties in the rubric must be in English, regardless of the language of the prompt. +- Rubric properties should not specify correct answers in their descriptions, e.g. to math and factoid questions if the prompt calls for such an answer. Rather, it should check that the response contains an answer and optional supporting evidence if relevant, and assume some other process will later validate correctness. A rubric property should however call out any false premises present in the prompt. + +About importance: +- Most properties will be of medium importance by default. +- Properties of high importance are critical to be fulfilled in a good response. +- Properties of low importance are considered optional or supplementary nice-to-haves. + +You will see prompts in many different languages, not just English. For each prompt you see, you will write down this rubric in JSON format. + +IMPORTANT: Never respond to the prompt given. Only write a rubric. + +Example: +What is the tallest building in the world? + +```json +{ + "criteria":[ + { + "rubric_id": "00001", + "property": "The response is in English.", + "type": "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE", + "importance": "high" + }, + { + "rubric_id": "00002", + "property": "Contains the name of the tallest building in the world.", + "type": "QA_ANSWER:FACTOID", + "importance": "high" + }, + { + "rubric_id": "00003", + "property": "Contains the exact height of the tallest building.", + "type": "QA_SUPPORTING_EVIDENCE:HEIGHT", + "importance": "low" + }, + { + "rubric_id": "00004", + "property": "Contains the location of the tallest building.", + "type": "QA_SUPPORTING_EVIDENCE:LOCATION", + "importance": "low" + }, + ... + ] +} +``` + +Write me a letter to my HOA asking them to reconsider the fees they are asking me to pay because I haven\'t mowed my lawn on time. I have been very busy at work. +```json +{ + "criteria": [ + { + "rubric_id": "00001", + "property": "The response is in English.", + "type": "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE", + "importance": "high" + }, + { + "rubric_id": "00002", + "property": "The response is formatted as a letter.", + "type": "FORMAT_REQUIREMENT:FORMAL_LETTER", + "importance": "medium" + }, + { + "rubric_id": "00003", + "property": "The letter is addressed to the Homeowners Association (HOA).", + "type": "CONTENT_REQUIREMENT:ADDRESSEE", + "importance": "medium" + }, + { + "rubric_id": "00004", + "property": "The letter explains that the sender has not mowed their lawn on time.", + "type": "CONTENT_REQUIREMENT:BACKGROUND_CONTEXT:TARDINESS", + "importance": "medium" + }, + { + "rubric_id": "00005", + "property": "The letter provides a reason for not mowing the lawn, specifically being busy at work.", + "type": "CONTENT_REQUIREMENT:EXPLANATION:EXCUSE:BUSY", + "importance": "medium" + }, + { + "rubric_id": "00006", + "property": "The letter discusses that the sender has been in compliance until now.", + "type": "OPTIONAL_CONTENT:SUPPORTING_EVIDENCE:COMPLIANCE", + "importance": "low" + }, + { + "rubric_id": "00007", + "property": "The letter requests that the HOA reconsider the fees associated with not mowing the lawn on time.", + "type": "CONTENT_REQUIREMENT:REQUEST:FEE_WAIVER", + "importance": "high" + }, + { + "rubric_id": "00008", + "property": "The letter maintains a polite and respectful tone.", + "type": "CONTENT_REQUIREMENT:FORMALITY:FORMAL", + "importance": "high" + }, + { + "rubric_id": "00009", + "property": "The letter includes a closing (e.g., \'Sincerely\') and the sender\'s name.", + "type": "CONTENT_REQUIREMENT:SIGNATURE", + "importance": "medium" + } + ] +} +``` + +Now write a rubric for the following user prompt. Remember to write only the rubric, NOT response to the prompt. + +User prompt: +{prompt}""" + + +def test_internal_method_generate_rubrics(client): + """Tests the internal _generate_rubrics method.""" + test_contents = [ + types.Content( + parts=[ + types.Part( + text="Generate a short story about a friendly dragon.", + ), + ], + ) + ] + response = client.evals._generate_rubrics( + contents=test_contents, + rubric_generation_spec=types.RubricGenerationSpec( + prompt_template=_TEST_RUBRIC_GENERATION_PROMPT, + ), + ) + assert len(response.generated_rubrics) >= 1 + + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), + test_method="evals._generate_rubrics", +) diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 1913d7fb71..d06087deed 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -664,6 +664,65 @@ def _EvaluateInstancesRequestParameters_to_vertex( return to_object +def _RubricGenerationSpec_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["prompt_template"]) is not None: + setv( + to_object, + ["promptTemplate"], + getv(from_object, ["prompt_template"]), + ) + + if getv(from_object, ["generator_model_config"]) is not None: + setv( + to_object, + ["model_config"], + getv(from_object, ["generator_model_config"]), + ) + + if getv(from_object, ["rubric_content_type"]) is not None: + setv( + to_object, + ["rubricContentType"], + getv(from_object, ["rubric_content_type"]), + ) + + if getv(from_object, ["rubric_type_ontology"]) is not None: + setv( + to_object, + ["rubricTypeOntology"], + getv(from_object, ["rubric_type_ontology"]), + ) + + return to_object + + +def _GenerateInstanceRubricsRequest_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["contents"]) is not None: + setv(to_object, ["contents"], getv(from_object, ["contents"])) + + if getv(from_object, ["rubric_generation_spec"]) is not None: + setv( + to_object, + ["rubricGenerationSpec"], + _RubricGenerationSpec_to_vertex( + getv(from_object, ["rubric_generation_spec"]), to_object + ), + ) + + if getv(from_object, ["config"]) is not None: + setv(to_object, ["config"], getv(from_object, ["config"])) + + return to_object + + def _EvaluateInstancesResponse_from_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -790,6 +849,21 @@ def _EvaluateInstancesResponse_from_vertex( return to_object +def _GenerateInstanceRubricsResponse_from_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["generatedRubrics"]) is not None: + setv( + to_object, + ["generated_rubrics"], + getv(from_object, ["generatedRubrics"]), + ) + + return to_object + + class Evals(_api_module.BaseModule): def _evaluate_instances( self, @@ -869,6 +943,62 @@ def _evaluate_instances( self._api_client._verify_response(return_value) return return_value + def _generate_rubrics( + self, + *, + contents: list[genai_types.ContentOrDict], + rubric_generation_spec: types.RubricGenerationSpecOrDict, + config: Optional[types.RubricGenerationConfigOrDict] = None, + ) -> types.GenerateInstanceRubricsResponse: + """Generates rubrics for a given prompt.""" + + parameter_model = types._GenerateInstanceRubricsRequest( + contents=contents, + rubric_generation_spec=rubric_generation_spec, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError("This method is only supported in the Vertex AI client.") + else: + request_dict = _GenerateInstanceRubricsRequest_to_vertex(parameter_model) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = ":generateInstanceRubrics".format_map(request_url_dict) + else: + path = ":generateInstanceRubrics" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = self._api_client.request("post", path, request_dict, http_options) + + response_dict = "" if not response.body else json.loads(response.body) + + if self._api_client.vertexai: + response_dict = _GenerateInstanceRubricsResponse_from_vertex(response_dict) + + return_value = types.GenerateInstanceRubricsResponse._from_response( + response=response_dict, kwargs=parameter_model.model_dump() + ) + + self._api_client._verify_response(return_value) + return return_value + def run(self) -> types.EvaluateInstancesResponse: """Evaluates an instance of a model. @@ -1133,6 +1263,64 @@ async def _evaluate_instances( self._api_client._verify_response(return_value) return return_value + async def _generate_rubrics( + self, + *, + contents: list[genai_types.ContentOrDict], + rubric_generation_spec: types.RubricGenerationSpecOrDict, + config: Optional[types.RubricGenerationConfigOrDict] = None, + ) -> types.GenerateInstanceRubricsResponse: + """Generates rubrics for a given prompt.""" + + parameter_model = types._GenerateInstanceRubricsRequest( + contents=contents, + rubric_generation_spec=rubric_generation_spec, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError("This method is only supported in the Vertex AI client.") + else: + request_dict = _GenerateInstanceRubricsRequest_to_vertex(parameter_model) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = ":generateInstanceRubrics".format_map(request_url_dict) + else: + path = ":generateInstanceRubrics" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = await self._api_client.async_request( + "post", path, request_dict, http_options + ) + + response_dict = "" if not response.body else json.loads(response.body) + + if self._api_client.vertexai: + response_dict = _GenerateInstanceRubricsResponse_from_vertex(response_dict) + + return_value = types.GenerateInstanceRubricsResponse._from_response( + response=response_dict, kwargs=parameter_model.model_dump() + ) + + self._api_client._verify_response(return_value) + return return_value + async def batch_evaluate( self, *, diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index 951d662f4a..5067131bbd 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -218,6 +218,17 @@ class Language(_common.CaseInSensitiveEnum): """Python >= 3.10, with numpy and simpy available.""" +class RubricContentType(_common.CaseInSensitiveEnum): + """Specifies the type of rubric content to generate.""" + + PROPERTY = "PROPERTY" + """Generate rubrics based on properties.""" + NL_QUESTION_ANSWER = "NL_QUESTION_ANSWER" + """Generate rubrics in an NL question answer format.""" + PYTHON_CODE_ASSERTION = "PYTHON_CODE_ASSERTION" + """Generate rubrics in a unit test format.""" + + class GenerateMemoriesResponseGeneratedMemoryAction(_common.CaseInSensitiveEnum): """The action to take.""" @@ -234,6 +245,19 @@ class GenerateMemoriesResponseGeneratedMemoryAction(_common.CaseInSensitiveEnum) """The memory was deleted.""" +class Importance(_common.CaseInSensitiveEnum): + """Importance level of the rubric.""" + + IMPORTANCE_UNSPECIFIED = "IMPORTANCE_UNSPECIFIED" + """Importance is not specified.""" + HIGH = "HIGH" + """High importance.""" + MEDIUM = "MEDIUM" + """Medium importance.""" + LOW = "LOW" + """Low importance.""" + + class BleuInstance(_common.BaseModel): """Bleu instance.""" @@ -2082,6 +2106,222 @@ class EvaluateInstancesResponseDict(TypedDict, total=False): ] +class RubricGenerationSpec(_common.BaseModel): + """Spec for generating rubrics.""" + + prompt_template: Optional[str] = Field( + default=None, + description="""Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""", + ) + generator_model_config: Optional[AutoraterConfig] = Field( + default=None, + description="""Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""", + ) + rubric_content_type: Optional[RubricContentType] = Field( + default=None, + description="""The type of rubric content to be generated.""", + ) + rubric_type_ontology: Optional[list[str]] = Field( + default=None, + description="""An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""", + ) + + +class RubricGenerationSpecDict(TypedDict, total=False): + """Spec for generating rubrics.""" + + prompt_template: Optional[str] + """Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""" + + generator_model_config: Optional[AutoraterConfigDict] + """Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""" + + rubric_content_type: Optional[RubricContentType] + """The type of rubric content to be generated.""" + + rubric_type_ontology: Optional[list[str]] + """An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""" + + +RubricGenerationSpecOrDict = Union[RubricGenerationSpec, RubricGenerationSpecDict] + + +class RubricGenerationConfig(_common.BaseModel): + """Config for generating rubrics.""" + + http_options: Optional[HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + + +class RubricGenerationConfigDict(TypedDict, total=False): + """Config for generating rubrics.""" + + http_options: Optional[HttpOptionsDict] + """Used to override HTTP request options.""" + + +RubricGenerationConfigOrDict = Union[RubricGenerationConfig, RubricGenerationConfigDict] + + +class _GenerateInstanceRubricsRequest(_common.BaseModel): + """Parameters for generating rubrics.""" + + contents: Optional[list[genai_types.Content]] = Field( + default=None, + description="""The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.""", + ) + rubric_generation_spec: Optional[RubricGenerationSpec] = Field( + default=None, + description="""Specification for how the rubrics should be generated.""", + ) + config: Optional[RubricGenerationConfig] = Field(default=None, description="""""") + + +class _GenerateInstanceRubricsRequestDict(TypedDict, total=False): + """Parameters for generating rubrics.""" + + contents: Optional[list[genai_types.Content]] + """The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.""" + + rubric_generation_spec: Optional[RubricGenerationSpecDict] + """Specification for how the rubrics should be generated.""" + + config: Optional[RubricGenerationConfigDict] + """""" + + +_GenerateInstanceRubricsRequestOrDict = Union[ + _GenerateInstanceRubricsRequest, _GenerateInstanceRubricsRequestDict +] + + +class RubricContentProperty(_common.BaseModel): + """Defines criteria based on a specific property.""" + + description: Optional[str] = Field( + default=None, + description="""Description of the property being evaluated. + Example: "The model's response is grammatically correct." """, + ) + + +class RubricContentPropertyDict(TypedDict, total=False): + """Defines criteria based on a specific property.""" + + description: Optional[str] + """Description of the property being evaluated. + Example: "The model's response is grammatically correct." """ + + +RubricContentPropertyOrDict = Union[RubricContentProperty, RubricContentPropertyDict] + + +class RubricContent(_common.BaseModel): + """Content of the rubric, defining the testable criteria.""" + + property: Optional[RubricContentProperty] = Field( + default=None, + description="""Evaluation criteria based on a specific property.""", + ) + + +class RubricContentDict(TypedDict, total=False): + """Content of the rubric, defining the testable criteria.""" + + property: Optional[RubricContentPropertyDict] + """Evaluation criteria based on a specific property.""" + + +RubricContentOrDict = Union[RubricContent, RubricContentDict] + + +class Rubric(_common.BaseModel): + """Message representing a single testable criterion for evaluation. + + One input prompt could have multiple rubrics. + """ + + rubric_id: Optional[str] = Field( + default=None, + description="""Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""", + ) + content: Optional[RubricContent] = Field( + default=None, + description="""Required. The actual testable criteria for the rubric.""", + ) + type: Optional[str] = Field( + default=None, + description="""Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""", + ) + importance: Optional[Importance] = Field( + default=None, + description="""Optional. The relative importance of this rubric.""", + ) + + +class RubricDict(TypedDict, total=False): + """Message representing a single testable criterion for evaluation. + + One input prompt could have multiple rubrics. + """ + + rubric_id: Optional[str] + """Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""" + + content: Optional[RubricContentDict] + """Required. The actual testable criteria for the rubric.""" + + type: Optional[str] + """Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""" + + importance: Optional[Importance] + """Optional. The relative importance of this rubric.""" + + +RubricOrDict = Union[Rubric, RubricDict] + + +class GenerateInstanceRubricsResponse(_common.BaseModel): + """Response for generating rubrics.""" + + generated_rubrics: Optional[list[Rubric]] = Field( + default=None, description="""A list of generated rubrics.""" + ) + + +class GenerateInstanceRubricsResponseDict(TypedDict, total=False): + """Response for generating rubrics.""" + + generated_rubrics: Optional[list[RubricDict]] + """A list of generated rubrics.""" + + +GenerateInstanceRubricsResponseOrDict = Union[ + GenerateInstanceRubricsResponse, GenerateInstanceRubricsResponseDict +] + + class OptimizeConfig(_common.BaseModel): """Config for Prompt Optimizer.""" From bcdf041747eebfd5db558830df8e8259b03045f4 Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Fri, 11 Jul 2025 07:09:19 -0700 Subject: [PATCH 02/13] chore: Gen AI SDK client - update types in Prompt Optimizer PiperOrigin-RevId: 781969281 --- ...est_prompt_optimizer_optimize_job_state.py | 1 + vertexai/_genai/prompt_optimizer.py | 36 +++++++++---------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py b/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py index 09bc5fc8e6..6be77a528a 100644 --- a/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py +++ b/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py @@ -44,6 +44,7 @@ def test_optimize(client): method="vapo", config=config, ) + assert isinstance(job, types.CustomJob) assert job.state == types.JobState.JOB_STATE_SUCCEEDED diff --git a/vertexai/_genai/prompt_optimizer.py b/vertexai/_genai/prompt_optimizer.py index 2b742ea069..b072a9974d 100644 --- a/vertexai/_genai/prompt_optimizer.py +++ b/vertexai/_genai/prompt_optimizer.py @@ -618,16 +618,14 @@ def optimize( } args = ["--%s=%s" % (k, v) for k, v in container_args.items()] worker_pool_specs = [ - { - "replica_count": 1, - "container_spec": { - "image_uri": container_uri, - "args": args, - }, - "machine_spec": { - "machine_type": "n1-standard-4", - }, - } + types.WorkerPoolSpec( + replica_count=1, + machine_spec=types.MachineSpec(machine_type="n1-standard-4"), + container_spec=types.ContainerSpec( + image_uri=container_uri, + args=args, + ), + ) ] service_account = _prompt_optimizer_utils._get_service_account(config) @@ -879,16 +877,14 @@ async def optimize( } args = ["--%s=%s" % (k, v) for k, v in container_args.items()] worker_pool_specs = [ - { - "replica_count": 1, - "container_spec": { - "image_uri": container_uri, - "args": args, - }, - "machine_spec": { - "machine_type": "n1-standard-4", - }, - } + types.WorkerPoolSpec( + replica_count=1, + machine_spec=types.MachineSpec(machine_type="n1-standard-4"), + container_spec=types.ContainerSpec( + image_uri=container_uri, + args=args, + ), + ) ] service_account = _prompt_optimizer_utils._get_service_account(config) From 22fa1fed98514fbca1892a1986c485e63fbbe752 Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Fri, 11 Jul 2025 08:05:46 -0700 Subject: [PATCH 03/13] chore: GenAI SDK client - Add more types coverage for Agent Engines replay tests PiperOrigin-RevId: 781984631 --- .../test_create_agent_engine_memory.py | 6 +++ .../test_delete_agent_engine_memory.py | 2 + .../test_generate_agent_engine_memories.py | 25 ++++++----- .../replays/test_get_agent_engine_memory.py | 3 ++ .../test_list_agent_engine_memories.py | 14 +++--- .../test_retrieve_agent_engine_memories.py | 26 ++++++++++- .../genai/replays/test_update_agent_engine.py | 44 +++++++++++++++++++ 7 files changed, 99 insertions(+), 21 deletions(-) create mode 100644 tests/unit/vertexai/genai/replays/test_update_agent_engine.py diff --git a/tests/unit/vertexai/genai/replays/test_create_agent_engine_memory.py b/tests/unit/vertexai/genai/replays/test_create_agent_engine_memory.py index c09e9ea78f..803e78d436 100644 --- a/tests/unit/vertexai/genai/replays/test_create_agent_engine_memory.py +++ b/tests/unit/vertexai/genai/replays/test_create_agent_engine_memory.py @@ -15,15 +15,21 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types def test_create_memory(client): agent_engine = client.agent_engines.create() + assert isinstance(agent_engine, types.AgentEngine) + assert isinstance(agent_engine.api_resource, types.ReasoningEngine) + operation = client.agent_engines.create_memory( name=agent_engine.api_resource.name, fact="memory_fact", scope={"user_id": "123"}, + config=types.AgentEngineMemoryConfig(display_name="my_memory_fact"), ) + assert isinstance(operation, types.AgentEngineMemoryOperation) assert operation.response.fact == "memory_fact" assert operation.response.scope == {"user_id": "123"} assert operation.response.name.startswith(agent_engine.api_resource.name) diff --git a/tests/unit/vertexai/genai/replays/test_delete_agent_engine_memory.py b/tests/unit/vertexai/genai/replays/test_delete_agent_engine_memory.py index de9cab787b..1e6a9f5782 100644 --- a/tests/unit/vertexai/genai/replays/test_delete_agent_engine_memory.py +++ b/tests/unit/vertexai/genai/replays/test_delete_agent_engine_memory.py @@ -15,6 +15,7 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types def test_delete_memory(client): @@ -26,6 +27,7 @@ def test_delete_memory(client): ) memory = operation.response operation = client.agent_engines.delete_memory(name=memory.name) + assert isinstance(operation, types.DeleteAgentEngineMemoryOperation) assert operation.name.startswith(memory.name + "/operations/") diff --git a/tests/unit/vertexai/genai/replays/test_generate_agent_engine_memories.py b/tests/unit/vertexai/genai/replays/test_generate_agent_engine_memories.py index 77d67a91bf..c25510d013 100644 --- a/tests/unit/vertexai/genai/replays/test_generate_agent_engine_memories.py +++ b/tests/unit/vertexai/genai/replays/test_generate_agent_engine_memories.py @@ -15,6 +15,7 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types def test_generate_memories(client): @@ -26,19 +27,21 @@ def test_generate_memories(client): ) client.agent_engines.generate_memories( name=agent_engine.api_resource.name, - direct_contents_source={ - "events": [ - { - "content": { - "role": "model", - "parts": [ - {"text": "I am a software engineer focusing in security"} + scope={"user_id": "test-user-id"}, + direct_contents_source=types.GenerateMemoriesRequestDirectContentsSource( + events=[ + types.GenerateMemoriesRequestDirectContentsSourceEvent( + content=types.Content( + role="model", + parts=[ + types.Part( + text="I am a software engineer focusing in security" + ) ], - } - } + ) + ) ] - }, - scope={"user_id": "test-user-id"}, + ), ) assert ( len( diff --git a/tests/unit/vertexai/genai/replays/test_get_agent_engine_memory.py b/tests/unit/vertexai/genai/replays/test_get_agent_engine_memory.py index 855bd9d44d..f74aba4b77 100644 --- a/tests/unit/vertexai/genai/replays/test_get_agent_engine_memory.py +++ b/tests/unit/vertexai/genai/replays/test_get_agent_engine_memory.py @@ -15,6 +15,7 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types def test_get_memory(client): @@ -24,9 +25,11 @@ def test_get_memory(client): fact="memory_fact", scope={"user_id": "123"}, ) + assert isinstance(operation, types.AgentEngineMemoryOperation) memory = client.agent_engines.get_memory( name=operation.response.name, ) + assert isinstance(memory, types.Memory) assert memory.name == operation.response.name diff --git a/tests/unit/vertexai/genai/replays/test_list_agent_engine_memories.py b/tests/unit/vertexai/genai/replays/test_list_agent_engine_memories.py index c369f028cf..534f1c6f15 100644 --- a/tests/unit/vertexai/genai/replays/test_list_agent_engine_memories.py +++ b/tests/unit/vertexai/genai/replays/test_list_agent_engine_memories.py @@ -15,6 +15,7 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types def test_list_memories(client): @@ -29,16 +30,11 @@ def test_list_memories(client): fact="memory_fact", scope={"user_id": "123"}, ) - assert ( - len( - list( - client.agent_engines.list_memories( - name=agent_engine.api_resource.name, - ) - ) - ) - == 1 + memory_list = client.agent_engines.list_memories( + name=agent_engine.api_resource.name, ) + assert len(memory_list) == 1 + assert isinstance(memory_list[0], types.Memory) pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/replays/test_retrieve_agent_engine_memories.py b/tests/unit/vertexai/genai/replays/test_retrieve_agent_engine_memories.py index 5a5d163724..a35e764aec 100644 --- a/tests/unit/vertexai/genai/replays/test_retrieve_agent_engine_memories.py +++ b/tests/unit/vertexai/genai/replays/test_retrieve_agent_engine_memories.py @@ -15,14 +15,19 @@ # pylint: disable=protected-access,bad-continuation,missing-function-docstring from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types +from google.genai import pagers -def test_retrieve_memories(client): +def test_retrieve_memories_with_similarity_search_params(client): agent_engine = client.agent_engines.create() assert not list( client.agent_engines.retrieve_memories( name=agent_engine.api_resource.name, scope={"user_id": "123"}, + similarity_search_params=types.RetrieveMemoriesRequestSimilaritySearchParams( + search_query="memory_fact_1", + ), ) ) client.agent_engines.create_memory( @@ -65,6 +70,25 @@ def test_retrieve_memories(client): ) +def test_retrieve_memories_with_simple_retrieval_params(client): + agent_engine = client.agent_engines.create() + client.agent_engines.create_memory( + name=agent_engine.api_resource.name, + fact="memory_fact_1", + scope={"user_id": "123"}, + ) + memories = client.agent_engines.retrieve_memories( + name=agent_engine.api_resource.name, + scope={"user_id": "123"}, + simple_retrieval_params=types.RetrieveMemoriesRequestSimpleRetrievalParams( + page_size=1, + ), + ) + assert isinstance(memories, pagers.Pager) + assert isinstance(memories.page[0], types.RetrieveMemoriesResponseRetrievedMemory) + assert memories.page_size == 1 + + pytestmark = pytest_helper.setup( file=__file__, globals_for_file=globals(), diff --git a/tests/unit/vertexai/genai/replays/test_update_agent_engine.py b/tests/unit/vertexai/genai/replays/test_update_agent_engine.py new file mode 100644 index 0000000000..edc71a112d --- /dev/null +++ b/tests/unit/vertexai/genai/replays/test_update_agent_engine.py @@ -0,0 +1,44 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + + +from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types + + +def test_agent_engines_update(client): + agent_engine = client.agent_engines.create() + assert agent_engine.api_resource.display_name is None + + updated_agent_engine = client.agent_engines.update( + name=agent_engine.api_resource.name, + config=types.AgentEngineConfig( + display_name="updated_display_name", + description="updated description", + ), + ) + assert isinstance(updated_agent_engine, types.AgentEngine) + assert updated_agent_engine.api_resource.name == agent_engine.api_resource.name + + assert updated_agent_engine.api_resource.display_name == "updated_display_name" + assert updated_agent_engine.api_resource.description == "updated description" + + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), + test_method="agent_engines.update", +) From 0d63396899894b705ab6dcef6e7d3627ee169dea Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Fri, 11 Jul 2025 11:52:27 -0700 Subject: [PATCH 04/13] chore: GenAI SDK client - add more replay tests for eval PiperOrigin-RevId: 782058943 --- .../vertexai/genai/replays/test_evaluate.py | 61 ++++++++++++++++ .../genai/replays/test_evaluate_instances.py | 73 +++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 tests/unit/vertexai/genai/replays/test_evaluate.py diff --git a/tests/unit/vertexai/genai/replays/test_evaluate.py b/tests/unit/vertexai/genai/replays/test_evaluate.py new file mode 100644 index 0000000000..8eed522abc --- /dev/null +++ b/tests/unit/vertexai/genai/replays/test_evaluate.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + +from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types +import pandas as pd + + +def test_evaluation_result(client): + """Tests that evaluate() produces a correctly structured EvaluationResult.""" + prompts_df = pd.DataFrame({"prompt": ["What is Taylor Swift's most recent album?"]}) + + eval_dataset = client.evals.run_inference( + model="gemini-2.5-flash", + src=prompts_df, + ) + + metrics_to_run = [ + types.PrebuiltMetric.TEXT_QUALITY, + ] + + evaluation_result = client.evals.evaluate( + dataset=eval_dataset, + metrics=metrics_to_run, + ) + + assert isinstance(evaluation_result, types.EvaluationResult) + + assert evaluation_result.summary_metrics is not None + assert len(evaluation_result.summary_metrics) > 0 + for summary in evaluation_result.summary_metrics: + assert isinstance(summary, types.AggregatedMetricResult) + assert summary.metric_name is not None + assert summary.mean_score is not None + + assert evaluation_result.eval_case_results is not None + assert len(evaluation_result.eval_case_results) > 0 + for case_result in evaluation_result.eval_case_results: + assert isinstance(case_result, types.EvalCaseResult) + assert case_result.eval_case_index is not None + assert case_result.response_candidate_results is not None + + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), + test_method="evals.evaluate", +) diff --git a/tests/unit/vertexai/genai/replays/test_evaluate_instances.py b/tests/unit/vertexai/genai/replays/test_evaluate_instances.py index 8308450b8e..c3a78654a3 100644 --- a/tests/unit/vertexai/genai/replays/test_evaluate_instances.py +++ b/tests/unit/vertexai/genai/replays/test_evaluate_instances.py @@ -18,6 +18,7 @@ from tests.unit.vertexai.genai.replays import pytest_helper from vertexai._genai import types import pandas as pd +import json def test_bleu_metric(client): @@ -34,6 +35,78 @@ def test_bleu_metric(client): assert len(response.bleu_results.bleu_metric_values) == 1 +def test_exact_match_metric(client): + """Tests the _evaluate_instances method with ExactMatchInput.""" + test_exact_match_input = types.ExactMatchInput( + instances=[ + types.ExactMatchInstance( + prediction="The quick brown fox jumps over the lazy dog.", + reference="The quick brown fox jumps over the lazy dog.", + ) + ], + metric_spec=types.ExactMatchSpec(), + ) + response = client.evals._evaluate_instances( + exact_match_input=test_exact_match_input + ) + assert len(response.exact_match_results.exact_match_metric_values) == 1 + + +def test_rouge_metric(client): + """Tests the _evaluate_instances method with RougeInput.""" + test_rouge_input = types.RougeInput( + instances=[ + types.RougeInstance( + prediction="A fast brown fox leaps over a lazy dog.", + reference="The quick brown fox jumps over the lazy dog.", + ) + ], + metric_spec=types.RougeSpec(rouge_type="rougeL"), + ) + response = client.evals._evaluate_instances(rouge_input=test_rouge_input) + assert len(response.rouge_results.rouge_metric_values) == 1 + + +def test_pointwise_metric(client): + """Tests the _evaluate_instances method with PointwiseMetricInput.""" + instance_dict = {"prompt": "What is the capital of France?", "response": "Paris"} + json_instance = json.dumps(instance_dict) + + test_input = types.PointwiseMetricInput( + instance=types.PointwiseMetricInstance(json_instance=json_instance), + metric_spec=types.PointwiseMetricSpec( + metric_prompt_template="Evaluate if the response '{response}' correctly answers the prompt '{prompt}'." + ), + ) + response = client.evals._evaluate_instances(pointwise_metric_input=test_input) + assert response.pointwise_metric_result is not None + assert response.pointwise_metric_result.score is not None + + +def test_pairwise_metric_with_autorater(client): + """Tests the _evaluate_instances method with PairwiseMetricInput and AutoraterConfig.""" + + instance_dict = { + "baseline_response": "Short summary.", + "candidate_response": "A longer, more detailed summary.", + } + json_instance = json.dumps(instance_dict) + + test_input = types.PairwiseMetricInput( + instance=types.PairwiseMetricInstance(json_instance=json_instance), + metric_spec=types.PairwiseMetricSpec( + metric_prompt_template="Which response is a better summary? Baseline: '{baseline_response}' or Candidate: '{candidate_response}'" + ), + ) + autorater_config = types.AutoraterConfig(sampling_count=2) + + response = client.evals._evaluate_instances( + pairwise_metric_input=test_input, autorater_config=autorater_config + ) + assert response.pairwise_metric_result is not None + assert response.pairwise_metric_result.pairwise_choice is not None + + def test_run_inference_with_string_model(client): test_df = pd.DataFrame({"prompt": ["test prompt"]}) From cee8d8b85f00efb259dbee5e2fa6d0cbed73e24c Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 11 Jul 2025 12:33:00 -0700 Subject: [PATCH 05/13] fix: GenAI SDK client(evals) - Handle optional pandas dependency in type hints PiperOrigin-RevId: 782072742 --- vertexai/_genai/types.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index 5067131bbd..ab4ab5566a 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -62,12 +62,16 @@ def __getattr__(name: str) -> typing.Any: if typing.TYPE_CHECKING: import pandas as pd + + PandasDataFrame = pd.DataFrame else: - pd: typing.Type = Any try: import pandas as pd + + PandasDataFrame = pd.DataFrame except ImportError: pd = None + PandasDataFrame = Any if typing.TYPE_CHECKING: import yaml else: @@ -6366,7 +6370,7 @@ class EvaluationDataset(_common.BaseModel): eval_cases: Optional[list[EvalCase]] = Field( default=None, description="""The evaluation cases to be evaluated.""" ) - eval_dataset_df: Optional["pd.DataFrame"] = Field( + eval_dataset_df: Optional[PandasDataFrame] = Field( default=None, description="""The evaluation dataset in the form of a Pandas DataFrame.""", ) @@ -6408,7 +6412,7 @@ class EvaluationDatasetDict(TypedDict, total=False): eval_cases: Optional[list[EvalCaseDict]] """The evaluation cases to be evaluated.""" - eval_dataset_df: Optional["pd.DataFrame"] + eval_dataset_df: Optional[PandasDataFrame] """The evaluation dataset in the form of a Pandas DataFrame.""" candidate_name: Optional[str] From 43eee8de3a6cbcf5e74a1272565b5307e882d194 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:52:48 -0700 Subject: [PATCH 06/13] Copybara import of the project: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -- c32529dfa8507d589579900dac73fc31a390a0b3 by Owl Bot : feat: add API for Managed OSS Fine Tuning docs: A comment for field `model` in message `.google.cloud.aiplatform.v1beta1.TunedModel` is changed docs: A comment for field `learning_rate_multiplier` in message `.google.cloud.aiplatform.v1beta1.SupervisedHyperParameters` is changed docs: A comment for field `training_dataset_uri` in message `.google.cloud.aiplatform.v1beta1.SupervisedTuningSpec` is changed docs: A comment for field `validation_dataset_uri` in message `.google.cloud.aiplatform.v1beta1.SupervisedTuningSpec` is changed PiperOrigin-RevId: 782121247 Source-Link: https://github.com/googleapis/googleapis/commit/5680805cfb38a0d61c746ec591dbb882870f8893 Source-Link: https://github.com/googleapis/googleapis-gen/commit/687626560f50a90b04b5ca382ee711b0d00323f2 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjg3NjI2NTYwZjUwYTkwYjA0YjVjYTM4MmVlNzExYjBkMDAzMjNmMiJ9 feat: Vertex AI Model Garden custom model deploy Public Preview PiperOrigin-RevId: 782029721 Source-Link: https://github.com/googleapis/googleapis/commit/e21caa35b051604acef5d3297e8890da739a36aa Source-Link: https://github.com/googleapis/googleapis-gen/commit/2bc7a491ba8a9c946b583c9d500e8b8a42b18812 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMmJjN2E0OTFiYThhOWM5NDZiNTgzYzlkNTAwZThiOGE0MmIxODgxMiJ9 feat: Add Aggregation Output in EvaluateDataset Get Operation Response docs: Add constraints for AggregationMetric enum and default value for flip_enabled field in AutoraterConfig PiperOrigin-RevId: 781252306 Source-Link: https://github.com/googleapis/googleapis/commit/0cdaee64d46d284e4d54afe5b429379b4c6770fc Source-Link: https://github.com/googleapis/googleapis-gen/commit/8e46ada24bebdc67d234b2b39cbf1f8d24a89dc9 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiOGU0NmFkYTI0YmViZGM2N2QyMzRiMmIzOWNiZjFmOGQyNGE4OWRjOSJ9 feat: add flexstart option to v1beta1 feat: some comments changes in machine_resources.proto to v1beta1 docs: A comment for message `DedicatedResources` is changed docs: A comment for field `machine_spec` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed docs: A comment for field `min_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed docs: A comment for field `max_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed docs: A comment for field `required_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed docs: A comment for field `min_replica_count` in message `.google.cloud.aiplatform.v1beta1.AutomaticResources` is changed docs: A comment for field `max_replica_count` in message `.google.cloud.aiplatform.v1beta1.AutomaticResources` is changed docs: A comment for field `boot_disk_type` in message `.google.cloud.aiplatform.v1beta1.DiskSpec` is changed PiperOrigin-RevId: 781151401 Source-Link: https://github.com/googleapis/googleapis/commit/f74e4faac803407f30c7054b7717625231508363 Source-Link: https://github.com/googleapis/googleapis-gen/commit/e851822574acc70d1c7fececb1ea943f68eb8a14 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiZTg1MTgyMjU3NGFjYzcwZDFjN2ZlY2VjYjFlYTk0M2Y2OGViOGExNCJ9 feat: Expose task_unique_name in pipeline task details for pipeline rerun PiperOrigin-RevId: 780661695 Source-Link: https://github.com/googleapis/googleapis/commit/b6084d2a2e58d59053c101f8b2fce48a7e7217bf Source-Link: https://github.com/googleapis/googleapis-gen/commit/adf70b4fc7b939afb4775f3847aa75596cbd5e80 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiYWRmNzBiNGZjN2I5MzlhZmI0Nzc1ZjM4NDdhYTc1NTk2Y2JkNWU4MCJ9 -- fb5daaef486fe769a0af452e682728f8875565ac by Owl Bot : 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md COPYBARA_INTEGRATE_REVIEW=https://github.com/googleapis/python-aiplatform/pull/5517 from googleapis:owl-bot-copy 29ea1c45c75b8bd7356d2e305b775b1c5ca2de2f PiperOrigin-RevId: 782966012 --- .../cloud/aiplatform_v1/types/pipeline_job.py | 13 ++ google/cloud/aiplatform_v1beta1/__init__.py | 6 + .../services/migration_service/client.py | 18 +-- .../aiplatform_v1beta1/types/__init__.py | 6 + .../types/evaluation_service.py | 140 ++++++++++++++++-- .../types/machine_resources.py | 114 ++++++++------ .../types/model_garden_service.py | 34 +++++ .../aiplatform_v1beta1/types/tuning_job.py | 87 +++++++++-- mypy.ini | 23 +-- ...t_metadata_google.cloud.aiplatform.v1.json | 2 +- ...adata_google.cloud.aiplatform.v1beta1.json | 2 +- .../aiplatform_v1/test_pipeline_service.py | 2 + .../aiplatform_v1/test_schedule_service.py | 4 + .../test_deployment_resource_pool_service.py | 3 + .../test_endpoint_service.py | 28 ++-- .../test_gen_ai_tuning_service.py | 46 ++++++ .../test_index_endpoint_service.py | 15 ++ .../test_migration_service.py | 26 ++-- 18 files changed, 449 insertions(+), 120 deletions(-) diff --git a/google/cloud/aiplatform_v1/types/pipeline_job.py b/google/cloud/aiplatform_v1/types/pipeline_job.py index 906aaee190..00b573f669 100644 --- a/google/cloud/aiplatform_v1/types/pipeline_job.py +++ b/google/cloud/aiplatform_v1/types/pipeline_job.py @@ -450,6 +450,15 @@ class PipelineTaskDetail(proto.Message): outputs (MutableMapping[str, google.cloud.aiplatform_v1.types.PipelineTaskDetail.ArtifactList]): Output only. The runtime output artifacts of the task. + task_unique_name (str): + Output only. The unique name of a task. This field is used + by rerun pipeline job. Console UI and Vertex AI SDK will + support triggering pipeline job reruns. The name is + constructed by concatenating all the parent tasks name with + the task name. For example, if a task named "child_task" has + a parent task named "parent_task_1" and parent task 1 has a + parent task named "parent_task_2", the task unique name will + be "parent_task_2.parent_task_1.child_task". """ class State(proto.Enum): @@ -603,6 +612,10 @@ class ArtifactList(proto.Message): number=11, message=ArtifactList, ) + task_unique_name: str = proto.Field( + proto.STRING, + number=14, + ) class PipelineTaskExecutorDetail(proto.Message): diff --git a/google/cloud/aiplatform_v1beta1/__init__.py b/google/cloud/aiplatform_v1beta1/__init__.py index 7063a42649..0eb66b3235 100644 --- a/google/cloud/aiplatform_v1beta1/__init__.py +++ b/google/cloud/aiplatform_v1beta1/__init__.py @@ -260,6 +260,8 @@ from .types.evaluated_annotation import ErrorAnalysisAnnotation from .types.evaluated_annotation import EvaluatedAnnotation from .types.evaluated_annotation import EvaluatedAnnotationExplanation +from .types.evaluation_service import AggregationOutput +from .types.evaluation_service import AggregationResult from .types.evaluation_service import AutoraterConfig from .types.evaluation_service import BleuInput from .types.evaluation_service import BleuInstance @@ -731,6 +733,7 @@ from .types.machine_resources import BatchDedicatedResources from .types.machine_resources import DedicatedResources from .types.machine_resources import DiskSpec +from .types.machine_resources import FlexStart from .types.machine_resources import MachineSpec from .types.machine_resources import NfsMount from .types.machine_resources import PersistentDiskSpec @@ -1334,6 +1337,8 @@ "AddExecutionEventsRequest", "AddExecutionEventsResponse", "AddTrialMeasurementRequest", + "AggregationOutput", + "AggregationResult", "Annotation", "AnnotationSpec", "ApiAuth", @@ -1725,6 +1730,7 @@ "FilterSplit", "FindNeighborsRequest", "FindNeighborsResponse", + "FlexStart", "FluencyInput", "FluencyInstance", "FluencyResult", diff --git a/google/cloud/aiplatform_v1beta1/services/migration_service/client.py b/google/cloud/aiplatform_v1beta1/services/migration_service/client.py index 800fb62ce3..786e8ef90f 100644 --- a/google/cloud/aiplatform_v1beta1/services/migration_service/client.py +++ b/google/cloud/aiplatform_v1beta1/services/migration_service/client.py @@ -265,40 +265,40 @@ def parse_dataset_path(path: str) -> Dict[str, str]: @staticmethod def dataset_path( project: str, - location: str, dataset: str, ) -> str: """Returns a fully-qualified dataset string.""" - return "projects/{project}/locations/{location}/datasets/{dataset}".format( + return "projects/{project}/datasets/{dataset}".format( project=project, - location=location, dataset=dataset, ) @staticmethod def parse_dataset_path(path: str) -> Dict[str, str]: """Parses a dataset path into its component segments.""" - m = re.match( - r"^projects/(?P.+?)/locations/(?P.+?)/datasets/(?P.+?)$", - path, - ) + m = re.match(r"^projects/(?P.+?)/datasets/(?P.+?)$", path) return m.groupdict() if m else {} @staticmethod def dataset_path( project: str, + location: str, dataset: str, ) -> str: """Returns a fully-qualified dataset string.""" - return "projects/{project}/datasets/{dataset}".format( + return "projects/{project}/locations/{location}/datasets/{dataset}".format( project=project, + location=location, dataset=dataset, ) @staticmethod def parse_dataset_path(path: str) -> Dict[str, str]: """Parses a dataset path into its component segments.""" - m = re.match(r"^projects/(?P.+?)/datasets/(?P.+?)$", path) + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/datasets/(?P.+?)$", + path, + ) return m.groupdict() if m else {} @staticmethod diff --git a/google/cloud/aiplatform_v1beta1/types/__init__.py b/google/cloud/aiplatform_v1beta1/types/__init__.py index 20a15f05ca..d18a7d4a50 100644 --- a/google/cloud/aiplatform_v1beta1/types/__init__.py +++ b/google/cloud/aiplatform_v1beta1/types/__init__.py @@ -211,6 +211,8 @@ EvaluatedAnnotationExplanation, ) from .evaluation_service import ( + AggregationOutput, + AggregationResult, AutoraterConfig, BleuInput, BleuInstance, @@ -756,6 +758,7 @@ BatchDedicatedResources, DedicatedResources, DiskSpec, + FlexStart, MachineSpec, NfsMount, PersistentDiskSpec, @@ -1593,6 +1596,8 @@ "ErrorAnalysisAnnotation", "EvaluatedAnnotation", "EvaluatedAnnotationExplanation", + "AggregationOutput", + "AggregationResult", "AutoraterConfig", "BleuInput", "BleuInstance", @@ -2060,6 +2065,7 @@ "BatchDedicatedResources", "DedicatedResources", "DiskSpec", + "FlexStart", "MachineSpec", "NfsMount", "PersistentDiskSpec", diff --git a/google/cloud/aiplatform_v1beta1/types/evaluation_service.py b/google/cloud/aiplatform_v1beta1/types/evaluation_service.py index 8a14cc4643..518d188b9b 100644 --- a/google/cloud/aiplatform_v1beta1/types/evaluation_service.py +++ b/google/cloud/aiplatform_v1beta1/types/evaluation_service.py @@ -31,6 +31,8 @@ "EvaluateDatasetOperationMetadata", "EvaluateDatasetResponse", "OutputInfo", + "AggregationOutput", + "AggregationResult", "EvaluateDatasetRequest", "OutputConfig", "Metric", @@ -228,11 +230,20 @@ class EvaluateDatasetResponse(proto.Message): r"""Response in LRO for EvaluationService.EvaluateDataset. Attributes: + aggregation_output (google.cloud.aiplatform_v1beta1.types.AggregationOutput): + Output only. Aggregation statistics derived + from results of + EvaluationService.EvaluateDataset. output_info (google.cloud.aiplatform_v1beta1.types.OutputInfo): Output only. Output info for EvaluationService.EvaluateDataset. """ + aggregation_output: "AggregationOutput" = proto.Field( + proto.MESSAGE, + number=1, + message="AggregationOutput", + ) output_info: "OutputInfo" = proto.Field( proto.MESSAGE, number=3, @@ -264,6 +275,102 @@ class OutputInfo(proto.Message): ) +class AggregationOutput(proto.Message): + r"""The aggregation result for the entire dataset and all + metrics. + + Attributes: + dataset (google.cloud.aiplatform_v1beta1.types.EvaluationDataset): + The dataset used for evaluation & + aggregation. + aggregation_results (MutableSequence[google.cloud.aiplatform_v1beta1.types.AggregationResult]): + One AggregationResult per metric. + """ + + dataset: "EvaluationDataset" = proto.Field( + proto.MESSAGE, + number=1, + message="EvaluationDataset", + ) + aggregation_results: MutableSequence["AggregationResult"] = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="AggregationResult", + ) + + +class AggregationResult(proto.Message): + r"""The aggregation result for a single metric. + + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + + Attributes: + pointwise_metric_result (google.cloud.aiplatform_v1beta1.types.PointwiseMetricResult): + Result for pointwise metric. + + This field is a member of `oneof`_ ``aggregation_result``. + pairwise_metric_result (google.cloud.aiplatform_v1beta1.types.PairwiseMetricResult): + Result for pairwise metric. + + This field is a member of `oneof`_ ``aggregation_result``. + exact_match_metric_value (google.cloud.aiplatform_v1beta1.types.ExactMatchMetricValue): + Results for exact match metric. + + This field is a member of `oneof`_ ``aggregation_result``. + bleu_metric_value (google.cloud.aiplatform_v1beta1.types.BleuMetricValue): + Results for bleu metric. + + This field is a member of `oneof`_ ``aggregation_result``. + rouge_metric_value (google.cloud.aiplatform_v1beta1.types.RougeMetricValue): + Results for rouge metric. + + This field is a member of `oneof`_ ``aggregation_result``. + aggregation_metric (google.cloud.aiplatform_v1beta1.types.Metric.AggregationMetric): + Aggregation metric. + """ + + pointwise_metric_result: "PointwiseMetricResult" = proto.Field( + proto.MESSAGE, + number=5, + oneof="aggregation_result", + message="PointwiseMetricResult", + ) + pairwise_metric_result: "PairwiseMetricResult" = proto.Field( + proto.MESSAGE, + number=6, + oneof="aggregation_result", + message="PairwiseMetricResult", + ) + exact_match_metric_value: "ExactMatchMetricValue" = proto.Field( + proto.MESSAGE, + number=7, + oneof="aggregation_result", + message="ExactMatchMetricValue", + ) + bleu_metric_value: "BleuMetricValue" = proto.Field( + proto.MESSAGE, + number=8, + oneof="aggregation_result", + message="BleuMetricValue", + ) + rouge_metric_value: "RougeMetricValue" = proto.Field( + proto.MESSAGE, + number=9, + oneof="aggregation_result", + message="RougeMetricValue", + ) + aggregation_metric: "Metric.AggregationMetric" = proto.Field( + proto.ENUM, + number=4, + enum="Metric.AggregationMetric", + ) + + class EvaluateDatasetRequest(proto.Message): r"""Request message for EvaluationService.EvaluateDataset. @@ -373,25 +480,34 @@ class AggregationMetric(proto.Enum): AGGREGATION_METRIC_UNSPECIFIED (0): Unspecified aggregation metric. AVERAGE (1): - Average aggregation metric. + Average aggregation metric. Not supported for + Pairwise metric. MODE (2): Mode aggregation metric. STANDARD_DEVIATION (3): - Standard deviation aggregation metric. + Standard deviation aggregation metric. Not + supported for pairwise metric. VARIANCE (4): - Variance aggregation metric. + Variance aggregation metric. Not supported + for pairwise metric. MINIMUM (5): - Minimum aggregation metric. + Minimum aggregation metric. Not supported for + pairwise metric. MAXIMUM (6): - Maximum aggregation metric. + Maximum aggregation metric. Not supported for + pairwise metric. MEDIAN (7): - Median aggregation metric. + Median aggregation metric. Not supported for + pairwise metric. PERCENTILE_P90 (8): - 90th percentile aggregation metric. + 90th percentile aggregation metric. Not + supported for pairwise metric. PERCENTILE_P95 (9): - 95th percentile aggregation metric. + 95th percentile aggregation metric. Not + supported for pairwise metric. PERCENTILE_P99 (10): - 99th percentile aggregation metric. + 99th percentile aggregation metric. Not + supported for pairwise metric. """ AGGREGATION_METRIC_UNSPECIFIED = 0 AVERAGE = 1 @@ -494,9 +610,9 @@ class AutoraterConfig(proto.Message): This field is a member of `oneof`_ ``_sampling_count``. flip_enabled (bool): - Optional. Whether to flip the candidate and baseline - responses. This is only applicable to the pairwise metric. - If enabled, also provide + Optional. Default is true. Whether to flip the candidate and + baseline responses. This is only applicable to the pairwise + metric. If enabled, also provide PairwiseMetricSpec.candidate_response_field_name and PairwiseMetricSpec.baseline_response_field_name. When rendering PairwiseMetricSpec.metric_prompt_template, the diff --git a/google/cloud/aiplatform_v1beta1/types/machine_resources.py b/google/cloud/aiplatform_v1beta1/types/machine_resources.py index e20d354d67..0e252bbe53 100644 --- a/google/cloud/aiplatform_v1beta1/types/machine_resources.py +++ b/google/cloud/aiplatform_v1beta1/types/machine_resources.py @@ -25,6 +25,7 @@ from google.cloud.aiplatform_v1beta1.types import ( reservation_affinity as gca_reservation_affinity, ) +from google.protobuf import duration_pb2 # type: ignore __protobuf__ = proto.module( @@ -40,6 +41,7 @@ "NfsMount", "AutoscalingMetricSpec", "ShieldedVmConfig", + "FlexStart", }, ) @@ -115,33 +117,32 @@ class MachineSpec(proto.Message): class DedicatedResources(proto.Message): r"""A description of resources that are dedicated to a - DeployedModel, and that need a higher degree of manual - configuration. + DeployedModel or DeployedIndex, and that need a higher degree of + manual configuration. Attributes: machine_spec (google.cloud.aiplatform_v1beta1.types.MachineSpec): Required. Immutable. The specification of a - single machine used by the prediction. + single machine being used. min_replica_count (int): Required. Immutable. The minimum number of - machine replicas this DeployedModel will be - always deployed on. This value must be greater - than or equal to 1. - - If traffic against the DeployedModel increases, - it may dynamically be deployed onto more - replicas, and as traffic decreases, some of - these extra replicas may be freed. + machine replicas that will be always deployed + on. This value must be greater than or equal to + 1. + + If traffic increases, it may dynamically be + deployed onto more replicas, and as traffic + decreases, some of these extra replicas may be + freed. max_replica_count (int): - Immutable. The maximum number of replicas this DeployedModel - may be deployed on when the traffic against it increases. If - the requested value is too large, the deployment will error, - but if deployment succeeds then the ability to scale the - model to that many replicas is guaranteed (barring service - outages). If traffic against the DeployedModel increases - beyond what its replicas at maximum may handle, a portion of - the traffic will be dropped. If this value is not provided, - will use + Immutable. The maximum number of replicas that may be + deployed on when the traffic against it increases. If the + requested value is too large, the deployment will error, but + if deployment succeeds then the ability to scale to that + many replicas is guaranteed (barring service outages). If + traffic increases beyond what its replicas at maximum may + handle, a portion of the traffic will be dropped. If this + value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count] as the default value. @@ -153,8 +154,8 @@ class DedicatedResources(proto.Message): required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when - partial model deployment/mutation is desired. If set, the - model deploy/mutate operation will succeed once + partial deployment/mutation is desired. If set, the + deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count. @@ -189,6 +190,11 @@ class DedicatedResources(proto.Message): spot (bool): Optional. If true, schedule the deployment workload on `spot VMs `__. + flex_start (google.cloud.aiplatform_v1beta1.types.FlexStart): + Optional. Immutable. If set, use DWS resource + to schedule the deployment workload. reference: + + (https://cloud.google.com/blog/products/compute/introducing-dynamic-workload-scheduler) """ machine_spec: "MachineSpec" = proto.Field( @@ -219,6 +225,11 @@ class DedicatedResources(proto.Message): proto.BOOL, number=5, ) + flex_start: "FlexStart" = proto.Field( + proto.MESSAGE, + number=10, + message="FlexStart", + ) class AutomaticResources(proto.Message): @@ -229,28 +240,27 @@ class AutomaticResources(proto.Message): Attributes: min_replica_count (int): - Immutable. The minimum number of replicas this DeployedModel - will be always deployed on. If traffic against it increases, - it may dynamically be deployed onto more replicas up to + Immutable. The minimum number of replicas that will be + always deployed on. If traffic against it increases, it may + dynamically be deployed onto more replicas up to [max_replica_count][google.cloud.aiplatform.v1beta1.AutomaticResources.max_replica_count], and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error. max_replica_count (int): Immutable. The maximum number of replicas - this DeployedModel may be deployed on when the - traffic against it increases. If the requested - value is too large, the deployment will error, - but if deployment succeeds then the ability to - scale the model to that many replicas is - guaranteed (barring service outages). If traffic - against the DeployedModel increases beyond what - its replicas at maximum may handle, a portion of - the traffic will be dropped. If this value is - not provided, a no upper bound for scaling under - heavy traffic will be assume, though Vertex AI - may be unable to scale beyond certain replica - number. + that may be deployed on when the traffic against + it increases. If the requested value is too + large, the deployment will error, but if + deployment succeeds then the ability to scale to + that many replicas is guaranteed (barring + service outages). If traffic increases beyond + what its replicas at maximum may handle, a + portion of the traffic will be dropped. If this + value is not provided, a no upper bound for + scaling under heavy traffic will be assume, + though Vertex AI may be unable to scale beyond + certain replica number. """ min_replica_count: int = proto.Field( @@ -321,10 +331,12 @@ class DiskSpec(proto.Message): Attributes: boot_disk_type (str): - Type of the boot disk (default is "pd-ssd"). - Valid values: "pd-ssd" (Persistent Disk Solid - State Drive) or "pd-standard" (Persistent Disk - Hard Disk Drive). + Type of the boot disk. For non-A3U machines, + the default value is "pd-ssd", for A3U machines, + the default value is "hyperdisk-balanced". Valid + values: "pd-ssd" (Persistent Disk Solid State + Drive), "pd-standard" (Persistent Disk Hard Disk + Drive) or "hyperdisk-balanced". boot_disk_size_gb (int): Size in GB of the boot disk (default is 100GB). @@ -451,4 +463,22 @@ class ShieldedVmConfig(proto.Message): ) +class FlexStart(proto.Message): + r"""FlexStart is used to schedule the deployment workload on DWS + resource. It contains the max duration of the deployment. + + Attributes: + max_runtime_duration (google.protobuf.duration_pb2.Duration): + The max duration of the deployment is max_runtime_duration. + The deployment will be terminated after the duration. The + max_runtime_duration can be set up to 7 days. + """ + + max_runtime_duration: duration_pb2.Duration = proto.Field( + proto.MESSAGE, + number=1, + message=duration_pb2.Duration, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/model_garden_service.py b/google/cloud/aiplatform_v1beta1/types/model_garden_service.py index 36da1c66c3..fcec2c3e47 100644 --- a/google/cloud/aiplatform_v1beta1/types/model_garden_service.py +++ b/google/cloud/aiplatform_v1beta1/types/model_garden_service.py @@ -249,6 +249,11 @@ class DeployRequest(proto.Message): The Hugging Face model to deploy. Format: Hugging Face model ID like ``google/gemma-2-2b-it``. + This field is a member of `oneof`_ ``artifacts``. + custom_model (google.cloud.aiplatform_v1beta1.types.DeployRequest.CustomModel): + The custom model to deploy from a Google + Cloud Storage URI. + This field is a member of `oneof`_ ``artifacts``. destination (str): Required. The resource name of the Location to deploy the @@ -268,6 +273,29 @@ class DeployRequest(proto.Message): config will be used. """ + class CustomModel(proto.Message): + r"""The custom model to deploy from model weights in a Google + Cloud Storage URI or Model Registry model. + + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + + Attributes: + gcs_uri (str): + Immutable. The Google Cloud Storage URI of + the custom model, storing weights and config + files (which can be used to infer the base + model). + + This field is a member of `oneof`_ ``model_source``. + """ + + gcs_uri: str = proto.Field( + proto.STRING, + number=2, + oneof="model_source", + ) + class ModelConfig(proto.Message): r"""The model config to use for the deployment. @@ -387,6 +415,12 @@ class DeployConfig(proto.Message): number=2, oneof="artifacts", ) + custom_model: CustomModel = proto.Field( + proto.MESSAGE, + number=3, + oneof="artifacts", + message=CustomModel, + ) destination: str = proto.Field( proto.STRING, number=4, diff --git a/google/cloud/aiplatform_v1beta1/types/tuning_job.py b/google/cloud/aiplatform_v1beta1/types/tuning_job.py index f6661202bb..64d6cc6e8f 100644 --- a/google/cloud/aiplatform_v1beta1/types/tuning_job.py +++ b/google/cloud/aiplatform_v1beta1/types/tuning_job.py @@ -90,6 +90,15 @@ class TuningJob(proto.Message): description (str): Optional. The description of the [TuningJob][google.cloud.aiplatform.v1.TuningJob]. + custom_base_model (str): + Optional. The user-provided path to custom model weights. + Set this field to tune a custom model. The path must be a + Cloud Storage directory that contains the model weights in + .safetensors format along with associated model metadata + files. If this field is set, the base_model field must still + be set to indicate which base model the custom model is + derived from. This feature is only available for open source + models. state (google.cloud.aiplatform_v1beta1.types.JobState): Output only. The detailed state of the job. create_time (google.protobuf.timestamp_pb2.Timestamp): @@ -154,6 +163,11 @@ class TuningJob(proto.Message): Users starting the pipeline must have the ``iam.serviceAccounts.actAs`` permission on this service account. + output_uri (str): + Optional. Cloud Storage path to the directory + where tuning job outputs are written to. This + field is only available and required for open + source models. """ base_model: str = proto.Field( @@ -191,6 +205,10 @@ class TuningJob(proto.Message): proto.STRING, number=3, ) + custom_base_model: str = proto.Field( + proto.STRING, + number=26, + ) state: job_state.JobState = proto.Field( proto.ENUM, number=6, @@ -253,6 +271,10 @@ class TuningJob(proto.Message): proto.STRING, number=22, ) + output_uri: str = proto.Field( + proto.STRING, + number=25, + ) class TunedModel(proto.Message): @@ -262,7 +284,14 @@ class TunedModel(proto.Message): Attributes: model (str): Output only. The resource name of the TunedModel. Format: - ``projects/{project}/locations/{location}/models/{model}``. + + ``projects/{project}/locations/{location}/models/{model}@{version_id}`` + + When tuning from a base model, the version_id will be 1. + + For continuous tuning, the version id will be incremented by + 1 from the last version id in the parent model. E.g., + ``projects/{project}/locations/{location}/models/{model}@{last_version_id + 1}`` endpoint (str): Output only. A resource name of an Endpoint. Format: ``projects/{project}/locations/{location}/endpoints/{endpoint}``. @@ -711,10 +740,18 @@ class SupervisedHyperParameters(proto.Message): makes over the entire training dataset during training. learning_rate_multiplier (float): - Optional. Multiplier for adjusting the - default learning rate. + Optional. Multiplier for adjusting the default learning + rate. Mutually exclusive with ``learning_rate``. + learning_rate (float): + Optional. Learning rate for tuning. Mutually exclusive with + ``learning_rate_multiplier``. This feature is only available + for open source models. adapter_size (google.cloud.aiplatform_v1beta1.types.SupervisedHyperParameters.AdapterSize): Optional. Adapter size for tuning. + batch_size (int): + Optional. Batch size for tuning. + This feature is only available for open source + models. """ class AdapterSize(proto.Enum): @@ -752,11 +789,19 @@ class AdapterSize(proto.Enum): proto.DOUBLE, number=2, ) + learning_rate: float = proto.Field( + proto.DOUBLE, + number=6, + ) adapter_size: AdapterSize = proto.Field( proto.ENUM, number=3, enum=AdapterSize, ) + batch_size: int = proto.Field( + proto.INT64, + number=5, + ) class SupervisedTuningSpec(proto.Message): @@ -764,13 +809,15 @@ class SupervisedTuningSpec(proto.Message): Attributes: training_dataset_uri (str): - Required. Cloud Storage path to file - containing training dataset for tuning. The - dataset must be formatted as a JSONL file. + Required. Training dataset used for tuning. + The dataset can be specified as either a Cloud + Storage path to a JSONL file or as the resource + name of a Vertex Multimodal Dataset. validation_dataset_uri (str): - Optional. Cloud Storage path to file - containing validation dataset for tuning. The - dataset must be formatted as a JSONL file. + Optional. Validation dataset used for tuning. + The dataset can be specified as either a Cloud + Storage path to a JSONL file or as the resource + name of a Vertex Multimodal Dataset. hyper_parameters (google.cloud.aiplatform_v1beta1.types.SupervisedHyperParameters): Optional. Hyperparameters for SFT. export_last_checkpoint_only (bool): @@ -779,8 +826,25 @@ class SupervisedTuningSpec(proto.Message): last checkpoint will be exported. Otherwise, enable intermediate checkpoints for SFT. Default is false. + tuning_mode (google.cloud.aiplatform_v1beta1.types.SupervisedTuningSpec.TuningMode): + Tuning mode. """ + class TuningMode(proto.Enum): + r"""Supported tuning modes. + + Values: + TUNING_MODE_UNSPECIFIED (0): + Tuning mode is unspecified. + TUNING_MODE_FULL (1): + Full fine-tuning mode. + TUNING_MODE_PEFT_ADAPTER (2): + PEFT adapter tuning mode. + """ + TUNING_MODE_UNSPECIFIED = 0 + TUNING_MODE_FULL = 1 + TUNING_MODE_PEFT_ADAPTER = 2 + training_dataset_uri: str = proto.Field( proto.STRING, number=1, @@ -798,6 +862,11 @@ class SupervisedTuningSpec(proto.Message): proto.BOOL, number=6, ) + tuning_mode: TuningMode = proto.Field( + proto.ENUM, + number=7, + enum=TuningMode, + ) class DistillationSpec(proto.Message): diff --git a/mypy.ini b/mypy.ini index e392baeb3c..574c5aed39 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,22 +1,3 @@ [mypy] -# TODO(b/422425982): Fix arg-type errors -disable_error_code = import-not-found, import-untyped, arg-type - -# We only want to run mypy on _genai dir, ignore dependent modules -[mypy-vertexai.agent_engines.*] -ignore_errors = True - -[mypy-vertexai.preview.*] -ignore_errors = True - -[mypy-vertexai.generative_models.*] -ignore_errors = True - -[mypy-vertexai.prompts.*] -ignore_errors = True - -[mypy-vertexai.tuning.*] -ignore_errors = True - -[mypy-vertexai.caching.*] -ignore_errors = True +python_version = 3.7 +namespace_packages = True diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json index 5ab7ef67aa..8dc7f2091f 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.103.0" + "version": "0.1.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json index c473f5ccd7..a789b54e19 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.103.0" + "version": "0.1.0" }, "snippets": [ { diff --git a/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py b/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py index c836376da3..718f9a62d2 100644 --- a/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py @@ -10061,6 +10061,7 @@ def test_create_pipeline_job_rest_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, @@ -13090,6 +13091,7 @@ async def test_create_pipeline_job_rest_asyncio_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, diff --git a/tests/unit/gapic/aiplatform_v1/test_schedule_service.py b/tests/unit/gapic/aiplatform_v1/test_schedule_service.py index b53d344d73..b87c09a511 100644 --- a/tests/unit/gapic/aiplatform_v1/test_schedule_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_schedule_service.py @@ -5681,6 +5681,7 @@ def test_create_schedule_rest_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, @@ -6707,6 +6708,7 @@ def test_update_schedule_rest_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, @@ -7923,6 +7925,7 @@ async def test_create_schedule_rest_asyncio_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, @@ -9049,6 +9052,7 @@ async def test_update_schedule_rest_asyncio_call_success(request_type): ], "inputs": {}, "outputs": {}, + "task_unique_name": "task_unique_name_value", } ], }, diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_deployment_resource_pool_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_deployment_resource_pool_service.py index 13ca387520..22aab8cbb5 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_deployment_resource_pool_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_deployment_resource_pool_service.py @@ -95,6 +95,7 @@ from google.iam.v1 import policy_pb2 # type: ignore from google.longrunning import operations_pb2 # type: ignore from google.oauth2 import service_account +from google.protobuf import duration_pb2 # type: ignore from google.protobuf import empty_pb2 # type: ignore from google.protobuf import field_mask_pb2 # type: ignore from google.protobuf import timestamp_pb2 # type: ignore @@ -6121,6 +6122,7 @@ def test_update_deployment_resource_pool_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": {"max_runtime_duration": {"seconds": 751, "nanos": 543}}, }, "encryption_spec": {"kms_key_name": "kms_key_name_value"}, "service_account": "service_account_value", @@ -7924,6 +7926,7 @@ async def test_update_deployment_resource_pool_rest_asyncio_call_success(request {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": {"max_runtime_duration": {"seconds": 751, "nanos": 543}}, }, "encryption_spec": {"kms_key_name": "kms_key_name_value"}, "service_account": "service_account_value", diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py index 3f93ab4496..7fad114c15 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py @@ -8220,6 +8220,9 @@ def test_create_endpoint_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "automatic_resources": { "min_replica_count": 1803, @@ -8349,9 +8352,7 @@ def test_create_endpoint_rest_call_success(request_type): }, "dedicated_endpoint_enabled": True, "dedicated_endpoint_dns": "dedicated_endpoint_dns_value", - "client_connection_config": { - "inference_timeout": {"seconds": 751, "nanos": 543} - }, + "client_connection_config": {"inference_timeout": {}}, "satisfies_pzs": True, "satisfies_pzi": True, "gen_ai_advanced_features_config": {"rag_config": {"enable_rag": True}}, @@ -8858,6 +8859,9 @@ def test_update_endpoint_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "automatic_resources": { "min_replica_count": 1803, @@ -8987,9 +8991,7 @@ def test_update_endpoint_rest_call_success(request_type): }, "dedicated_endpoint_enabled": True, "dedicated_endpoint_dns": "dedicated_endpoint_dns_value", - "client_connection_config": { - "inference_timeout": {"seconds": 751, "nanos": 543} - }, + "client_connection_config": {"inference_timeout": {}}, "satisfies_pzs": True, "satisfies_pzi": True, "gen_ai_advanced_features_config": {"rag_config": {"enable_rag": True}}, @@ -11014,6 +11016,9 @@ async def test_create_endpoint_rest_asyncio_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "automatic_resources": { "min_replica_count": 1803, @@ -11143,9 +11148,7 @@ async def test_create_endpoint_rest_asyncio_call_success(request_type): }, "dedicated_endpoint_enabled": True, "dedicated_endpoint_dns": "dedicated_endpoint_dns_value", - "client_connection_config": { - "inference_timeout": {"seconds": 751, "nanos": 543} - }, + "client_connection_config": {"inference_timeout": {}}, "satisfies_pzs": True, "satisfies_pzi": True, "gen_ai_advanced_features_config": {"rag_config": {"enable_rag": True}}, @@ -11703,6 +11706,9 @@ async def test_update_endpoint_rest_asyncio_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "automatic_resources": { "min_replica_count": 1803, @@ -11832,9 +11838,7 @@ async def test_update_endpoint_rest_asyncio_call_success(request_type): }, "dedicated_endpoint_enabled": True, "dedicated_endpoint_dns": "dedicated_endpoint_dns_value", - "client_connection_config": { - "inference_timeout": {"seconds": 751, "nanos": 543} - }, + "client_connection_config": {"inference_timeout": {}}, "satisfies_pzs": True, "satisfies_pzi": True, "gen_ai_advanced_features_config": {"rag_config": {"enable_rag": True}}, diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_gen_ai_tuning_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_gen_ai_tuning_service.py index 5c9bc7dc3b..53dcd32440 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_gen_ai_tuning_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_gen_ai_tuning_service.py @@ -1214,10 +1214,12 @@ def test_create_tuning_job(request_type, transport: str = "grpc"): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) response = client.create_tuning_job(request) @@ -1233,10 +1235,12 @@ def test_create_tuning_job(request_type, transport: str = "grpc"): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" def test_create_tuning_job_non_empty_request_with_auto_populated_field(): @@ -1372,10 +1376,12 @@ async def test_create_tuning_job_async( name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", ) ) response = await client.create_tuning_job(request) @@ -1391,10 +1397,12 @@ async def test_create_tuning_job_async( assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.asyncio @@ -1587,10 +1595,12 @@ def test_get_tuning_job(request_type, transport: str = "grpc"): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) response = client.get_tuning_job(request) @@ -1606,10 +1616,12 @@ def test_get_tuning_job(request_type, transport: str = "grpc"): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" def test_get_tuning_job_non_empty_request_with_auto_populated_field(): @@ -1739,10 +1751,12 @@ async def test_get_tuning_job_async( name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", ) ) response = await client.get_tuning_job(request) @@ -1758,10 +1772,12 @@ async def test_get_tuning_job_async( assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.asyncio @@ -4371,10 +4387,12 @@ async def test_create_tuning_job_empty_call_grpc_asyncio(): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", ) ) await client.create_tuning_job(request=None) @@ -4404,10 +4422,12 @@ async def test_get_tuning_job_empty_call_grpc_asyncio(): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", ) ) await client.get_tuning_job(request=None) @@ -4553,9 +4573,12 @@ def test_create_tuning_job_rest_call_success(request_type): "hyper_parameters": { "epoch_count": 1175, "learning_rate_multiplier": 0.2561, + "learning_rate": 0.1371, "adapter_size": 1, + "batch_size": 1052, }, "export_last_checkpoint_only": True, + "tuning_mode": 1, }, "distillation_spec": { "base_teacher_model": "base_teacher_model_value", @@ -4578,6 +4601,7 @@ def test_create_tuning_job_rest_call_success(request_type): "name": "name_value", "tuned_model_display_name": "tuned_model_display_name_value", "description": "description_value", + "custom_base_model": "custom_base_model_value", "state": 1, "create_time": {"seconds": 751, "nanos": 543}, "start_time": {}, @@ -4701,6 +4725,7 @@ def test_create_tuning_job_rest_call_success(request_type): "pipeline_job": "pipeline_job_value", "encryption_spec": {"kms_key_name": "kms_key_name_value"}, "service_account": "service_account_value", + "output_uri": "output_uri_value", } # The version of a generated dependency at test runtime may differ from the version used during generation. # Delete any fields which are not present in the current runtime dependency @@ -4778,10 +4803,12 @@ def get_message_fields(field): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) @@ -4802,10 +4829,12 @@ def get_message_fields(field): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.parametrize("null_interceptor", [True, False]) @@ -4919,10 +4948,12 @@ def test_get_tuning_job_rest_call_success(request_type): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) @@ -4943,10 +4974,12 @@ def test_get_tuning_job_rest_call_success(request_type): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.parametrize("null_interceptor", [True, False]) @@ -6200,9 +6233,12 @@ async def test_create_tuning_job_rest_asyncio_call_success(request_type): "hyper_parameters": { "epoch_count": 1175, "learning_rate_multiplier": 0.2561, + "learning_rate": 0.1371, "adapter_size": 1, + "batch_size": 1052, }, "export_last_checkpoint_only": True, + "tuning_mode": 1, }, "distillation_spec": { "base_teacher_model": "base_teacher_model_value", @@ -6225,6 +6261,7 @@ async def test_create_tuning_job_rest_asyncio_call_success(request_type): "name": "name_value", "tuned_model_display_name": "tuned_model_display_name_value", "description": "description_value", + "custom_base_model": "custom_base_model_value", "state": 1, "create_time": {"seconds": 751, "nanos": 543}, "start_time": {}, @@ -6348,6 +6385,7 @@ async def test_create_tuning_job_rest_asyncio_call_success(request_type): "pipeline_job": "pipeline_job_value", "encryption_spec": {"kms_key_name": "kms_key_name_value"}, "service_account": "service_account_value", + "output_uri": "output_uri_value", } # The version of a generated dependency at test runtime may differ from the version used during generation. # Delete any fields which are not present in the current runtime dependency @@ -6425,10 +6463,12 @@ def get_message_fields(field): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) @@ -6451,10 +6491,12 @@ def get_message_fields(field): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.asyncio @@ -6582,10 +6624,12 @@ async def test_get_tuning_job_rest_asyncio_call_success(request_type): name="name_value", tuned_model_display_name="tuned_model_display_name_value", description="description_value", + custom_base_model="custom_base_model_value", state=job_state.JobState.JOB_STATE_QUEUED, experiment="experiment_value", pipeline_job="pipeline_job_value", service_account="service_account_value", + output_uri="output_uri_value", base_model="base_model_value", ) @@ -6608,10 +6652,12 @@ async def test_get_tuning_job_rest_asyncio_call_success(request_type): assert response.name == "name_value" assert response.tuned_model_display_name == "tuned_model_display_name_value" assert response.description == "description_value" + assert response.custom_base_model == "custom_base_model_value" assert response.state == job_state.JobState.JOB_STATE_QUEUED assert response.experiment == "experiment_value" assert response.pipeline_job == "pipeline_job_value" assert response.service_account == "service_account_value" + assert response.output_uri == "output_uri_value" @pytest.mark.asyncio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py index ad0195b913..572570c50f 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py @@ -89,6 +89,7 @@ from google.iam.v1 import policy_pb2 # type: ignore from google.longrunning import operations_pb2 # type: ignore from google.oauth2 import service_account +from google.protobuf import duration_pb2 # type: ignore from google.protobuf import empty_pb2 # type: ignore from google.protobuf import field_mask_pb2 # type: ignore from google.protobuf import timestamp_pb2 # type: ignore @@ -6487,6 +6488,9 @@ def test_create_index_endpoint_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "enable_access_logging": True, "deployed_index_auth_config": { @@ -7067,6 +7071,9 @@ def test_update_index_endpoint_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "enable_access_logging": True, "deployed_index_auth_config": { @@ -7756,6 +7763,7 @@ def test_mutate_deployed_index_rest_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": {"max_runtime_duration": {"seconds": 751, "nanos": 543}}, }, "enable_access_logging": True, "deployed_index_auth_config": { @@ -8860,6 +8868,9 @@ async def test_create_index_endpoint_rest_asyncio_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "enable_access_logging": True, "deployed_index_auth_config": { @@ -9489,6 +9500,9 @@ async def test_update_index_endpoint_rest_asyncio_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": { + "max_runtime_duration": {"seconds": 751, "nanos": 543} + }, }, "enable_access_logging": True, "deployed_index_auth_config": { @@ -10244,6 +10258,7 @@ async def test_mutate_deployed_index_rest_asyncio_call_success(request_type): {"metric_name": "metric_name_value", "target": 647} ], "spot": True, + "flex_start": {"max_runtime_duration": {"seconds": 751, "nanos": 543}}, }, "enable_access_logging": True, "deployed_index_auth_config": { diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py index 574ddbdf19..1069dbb2b0 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py @@ -5426,22 +5426,19 @@ def test_parse_dataset_path(): def test_dataset_path(): project = "squid" - location = "clam" - dataset = "whelk" - expected = "projects/{project}/locations/{location}/datasets/{dataset}".format( + dataset = "clam" + expected = "projects/{project}/datasets/{dataset}".format( project=project, - location=location, dataset=dataset, ) - actual = MigrationServiceClient.dataset_path(project, location, dataset) + actual = MigrationServiceClient.dataset_path(project, dataset) assert expected == actual def test_parse_dataset_path(): expected = { - "project": "octopus", - "location": "oyster", - "dataset": "nudibranch", + "project": "whelk", + "dataset": "octopus", } path = MigrationServiceClient.dataset_path(**expected) @@ -5451,19 +5448,22 @@ def test_parse_dataset_path(): def test_dataset_path(): - project = "cuttlefish" - dataset = "mussel" - expected = "projects/{project}/datasets/{dataset}".format( + project = "oyster" + location = "nudibranch" + dataset = "cuttlefish" + expected = "projects/{project}/locations/{location}/datasets/{dataset}".format( project=project, + location=location, dataset=dataset, ) - actual = MigrationServiceClient.dataset_path(project, dataset) + actual = MigrationServiceClient.dataset_path(project, location, dataset) assert expected == actual def test_parse_dataset_path(): expected = { - "project": "winkle", + "project": "mussel", + "location": "winkle", "dataset": "nautilus", } path = MigrationServiceClient.dataset_path(**expected) From 6cc17586fe9a10008809f007b0702d7de7ff2434 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Mon, 14 Jul 2025 10:56:38 -0700 Subject: [PATCH 07/13] chore: Add filtering for experiment.list method PiperOrigin-RevId: 782967412 --- .../metadata/experiment_resources.py | 5 ++++ tests/system/aiplatform/test_experiments.py | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/google/cloud/aiplatform/metadata/experiment_resources.py b/google/cloud/aiplatform/metadata/experiment_resources.py index 0d1561eac3..c9490361a0 100644 --- a/google/cloud/aiplatform/metadata/experiment_resources.py +++ b/google/cloud/aiplatform/metadata/experiment_resources.py @@ -316,6 +316,7 @@ def get_or_create( def list( cls, *, + filter: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -327,6 +328,8 @@ def list( ``` Args: + filter (str): + Optional. A query to filter available resources for matching results. project (str): Optional. Project to list these experiments from. Overrides project set in aiplatform.init. @@ -343,6 +346,8 @@ def list( filter_str = metadata_utils._make_filter_string( schema_title=constants.SYSTEM_EXPERIMENT ) + if filter: + filter_str = f"{filter_str} AND ({filter})" with _SetLoggerLevel(resource): experiment_contexts = context.Context.list( diff --git a/tests/system/aiplatform/test_experiments.py b/tests/system/aiplatform/test_experiments.py index 9dbaf21886..9fe0d190a0 100644 --- a/tests/system/aiplatform/test_experiments.py +++ b/tests/system/aiplatform/test_experiments.py @@ -121,6 +121,35 @@ def test_get_run(self): assert run.name == _RUN assert run.state == aiplatform.gapic.Execution.State.RUNNING + def test_list_experiment(self): + experiments = aiplatform.Experiment.list( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + assert isinstance(experiments, list) + assert any( + experiment.name == self._experiment_name for experiment in experiments + ) + + def test_list_experiment_filter(self): + experiments = aiplatform.Experiment.list( + filter=f"display_name = {self._experiment_name}", + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + assert len(experiments) == 1 + assert any( + experiment.name == self._experiment_name for experiment in experiments + ) + + def test_list_experiment_filter_no_results(self): + experiments = aiplatform.Experiment.list( + filter="display_name = not_mathcing_filter_name", + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + assert len(experiments) == 0 + def test_log_params(self): aiplatform.init( project=e2e_base._PROJECT, From a38d3c8475bbda94b363a962a03bdac1325376bd Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Mon, 14 Jul 2025 14:11:40 -0700 Subject: [PATCH 08/13] chore: GenAI SDK client - Add async replay test for Prompt Optimizer PiperOrigin-RevId: 783037244 --- ...est_prompt_optimizer_optimize_job_state.py | 58 +++++++++++++++++-- vertexai/_genai/prompt_optimizer.py | 46 +++++++++------ 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py b/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py index 6be77a528a..f320be970d 100644 --- a/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py +++ b/tests/unit/vertexai/genai/replays/test_prompt_optimizer_optimize_job_state.py @@ -18,13 +18,10 @@ from tests.unit.vertexai.genai.replays import pytest_helper from vertexai._genai import types +import pytest -# If you re-record this test, you will need to update the replay file to -# include the placeholder values for config path and service account -def test_optimize(client): - """Tests the optimize request parameters method.""" - +def _raise_for_unset_env_vars() -> None: if not os.environ.get("VAPO_CONFIG_PATH"): raise ValueError("VAPO_CONFIG_PATH environment variable is not set.") if not os.environ.get("VAPO_SERVICE_ACCOUNT_PROJECT_NUMBER"): @@ -32,6 +29,14 @@ def test_optimize(client): "VAPO_SERVICE_ACCOUNT_PROJECT_NUMBER " "environment variable is not set." ) + +# If you re-record this test, you will need to update the replay file to +# include the placeholder values for config path and service account +def test_optimize(client): + """Tests the optimize request parameters method.""" + + _raise_for_unset_env_vars() + config = types.PromptOptimizerVAPOConfig( config_path=os.environ.get("VAPO_CONFIG_PATH"), wait_for_completion=True, @@ -53,3 +58,46 @@ def test_optimize(client): globals_for_file=globals(), test_method="prompt_optimizer.optimize", ) + + +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.asyncio +async def test_optimize_async(client): + _raise_for_unset_env_vars() + + config = types.PromptOptimizerVAPOConfig( + config_path=os.environ.get("VAPO_CONFIG_PATH"), + service_account_project_number=os.environ.get( + "VAPO_SERVICE_ACCOUNT_PROJECT_NUMBER" + ), + optimizer_job_display_name="optimizer_job_test", + ) + job = await client.aio.prompt_optimizer.optimize( + method="vapo", + config=config, + ) + assert isinstance(job, types.CustomJob) + assert job.state == types.JobState.JOB_STATE_PENDING + + +@pytest.mark.asyncio +async def test_optimize_async_with_config_wait_for_completion(client, caplog): + _raise_for_unset_env_vars() + + config = types.PromptOptimizerVAPOConfig( + config_path=os.environ.get("VAPO_CONFIG_PATH"), + service_account_project_number=os.environ.get( + "VAPO_SERVICE_ACCOUNT_PROJECT_NUMBER" + ), + optimizer_job_display_name="optimizer_job_test", + wait_for_completion=True, + ) + job = await client.aio.prompt_optimizer.optimize( + method="vapo", + config=config, + ) + assert isinstance(job, types.CustomJob) + assert job.state == types.JobState.JOB_STATE_PENDING + assert "Ignoring wait_for_completion=True" in caplog.text diff --git a/vertexai/_genai/prompt_optimizer.py b/vertexai/_genai/prompt_optimizer.py index b072a9974d..a2ea60e313 100644 --- a/vertexai/_genai/prompt_optimizer.py +++ b/vertexai/_genai/prompt_optimizer.py @@ -825,6 +825,7 @@ async def _get_custom_job( self._api_client._verify_response(return_value) return return_value + # Todo: b/428953357 - Add example in the README. async def optimize( self, method: str, @@ -832,25 +833,26 @@ async def optimize( ) -> types.CustomJob: """Call async Vertex AI Prompt Optimizer (VAPO). - # Todo: b/428953357 - Add example in the README. - Example usage: - client = vertexai.Client(project=PROJECT_NAME, location='us-central1') - vapo_config = vertexai.types.PromptOptimizerVAPOConfig( - config_path="gs://you-bucket-name/your-config.json", - service_account=service_account, - wait_for_completion=True - ) - job = await client.aio.prompt_optimizer.optimize( - method="vapo", config=vapo_config) - - Args: - method: The method for optimizing multiple prompts (currently only - vapo is supported). - config: PromptOptimizerVAPOConfig instance containing the - configuration for prompt optimization. - - Returns: - The custom job that was created. + Note: The `wait_for_completion` parameter in the config will be + ignored when using the AsyncClient, as it is not supported. + + Example usage: + client = vertexai.Client(project=PROJECT_NAME, location='us-central1') + vapo_config = vertexai.types.PromptOptimizerVAPOConfig( + config_path="gs://you-bucket-name/your-config.json", + service_account=service_account, + ) + job = await client.aio.prompt_optimizer.optimize( + method="vapo", config=vapo_config) + + Args: + method: The method for optimizing multiple prompts (currently only + vapo is supported). + config: PromptOptimizerVAPOConfig instance containing the + configuration for prompt optimization. + + Returns: + The custom job that was created. """ if method != "vapo": raise ValueError("Only vapo methods is currently supported.") @@ -858,6 +860,12 @@ async def optimize( if isinstance(config, dict): config = types.PromptOptimizerVAPOConfig(**config) + if config.wait_for_completion: + logger.info( + "Ignoring wait_for_completion=True since the AsyncClient does" + " not support it." + ) + if config.optimizer_job_display_name: display_name = config.optimizer_job_display_name else: From df2390e881b06629da29adb21a69c8cc68585aba Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Mon, 14 Jul 2025 15:11:24 -0700 Subject: [PATCH 09/13] feat: GenAI SDK client(evals) - add rubric-based evaluation types PiperOrigin-RevId: 783059072 --- vertexai/_genai/evals.py | 107 ++-- vertexai/_genai/types.py | 1310 +++++++++++++++++--------------------- 2 files changed, 656 insertions(+), 761 deletions(-) diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index d06087deed..cc5b217b04 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -649,6 +649,13 @@ def _EvaluateInstancesRequestParameters_to_vertex( ), ) + if getv(from_object, ["rubric_based_metric_input"]) is not None: + setv( + to_object, + ["rubricBasedMetricInput"], + getv(from_object, ["rubric_based_metric_input"]), + ) + if getv(from_object, ["autorater_config"]) is not None: setv( to_object, @@ -729,6 +736,13 @@ def _EvaluateInstancesResponse_from_vertex( ) -> dict[str, Any]: to_object: dict[str, Any] = {} + if getv(from_object, ["rubricBasedMetricResult"]) is not None: + setv( + to_object, + ["rubric_based_metric_result"], + getv(from_object, ["rubricBasedMetricResult"]), + ) + if getv(from_object, ["bleuResults"]) is not None: setv(to_object, ["bleu_results"], getv(from_object, ["bleuResults"])) @@ -762,20 +776,6 @@ def _EvaluateInstancesResponse_from_vertex( if getv(from_object, ["rougeResults"]) is not None: setv(to_object, ["rouge_results"], getv(from_object, ["rougeResults"])) - if getv(from_object, ["rubricBasedInstructionFollowingResult"]) is not None: - setv( - to_object, - ["rubric_based_instruction_following_result"], - getv(from_object, ["rubricBasedInstructionFollowingResult"]), - ) - - if getv(from_object, ["summarizationVerbosityResult"]) is not None: - setv( - to_object, - ["summarization_verbosity_result"], - getv(from_object, ["summarizationVerbosityResult"]), - ) - if getv(from_object, ["toolCallValidResults"]) is not None: setv( to_object, @@ -804,48 +804,58 @@ def _EvaluateInstancesResponse_from_vertex( getv(from_object, ["toolParameterKvMatchResults"]), ) - if getv(from_object, ["trajectoryAnyOrderMatchResults"]) is not None: - setv( - to_object, - ["trajectory_any_order_match_results"], - getv(from_object, ["trajectoryAnyOrderMatchResults"]), - ) + return to_object - if getv(from_object, ["trajectoryExactMatchResults"]) is not None: - setv( - to_object, - ["trajectory_exact_match_results"], - getv(from_object, ["trajectoryExactMatchResults"]), - ) - if getv(from_object, ["trajectoryInOrderMatchResults"]) is not None: - setv( - to_object, - ["trajectory_in_order_match_results"], - getv(from_object, ["trajectoryInOrderMatchResults"]), - ) +def _RubricContentProperty_from_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["description"]) is not None: + setv(to_object, ["description"], getv(from_object, ["description"])) + + return to_object - if getv(from_object, ["trajectoryPrecisionResults"]) is not None: - setv( - to_object, - ["trajectory_precision_results"], - getv(from_object, ["trajectoryPrecisionResults"]), - ) - if getv(from_object, ["trajectoryRecallResults"]) is not None: +def _RubricContent_from_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["property"]) is not None: setv( to_object, - ["trajectory_recall_results"], - getv(from_object, ["trajectoryRecallResults"]), + ["property"], + _RubricContentProperty_from_vertex( + getv(from_object, ["property"]), to_object + ), ) - if getv(from_object, ["trajectorySingleToolUseResults"]) is not None: + return to_object + + +def _Rubric_from_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["rubricId"]) is not None: + setv(to_object, ["rubric_id"], getv(from_object, ["rubricId"])) + + if getv(from_object, ["content"]) is not None: setv( to_object, - ["trajectory_single_tool_use_results"], - getv(from_object, ["trajectorySingleToolUseResults"]), + ["content"], + _RubricContent_from_vertex(getv(from_object, ["content"]), to_object), ) + if getv(from_object, ["type"]) is not None: + setv(to_object, ["type"], getv(from_object, ["type"])) + + if getv(from_object, ["importance"]) is not None: + setv(to_object, ["importance"], getv(from_object, ["importance"])) + return to_object @@ -858,7 +868,10 @@ def _GenerateInstanceRubricsResponse_from_vertex( setv( to_object, ["generated_rubrics"], - getv(from_object, ["generatedRubrics"]), + [ + _Rubric_from_vertex(item, to_object) + for item in getv(from_object, ["generatedRubrics"]) + ], ) return to_object @@ -881,6 +894,7 @@ def _evaluate_instances( tool_parameter_kv_match_input: Optional[ types.ToolParameterKVMatchInputOrDict ] = None, + rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None, autorater_config: Optional[types.AutoraterConfigOrDict] = None, config: Optional[types.EvaluateInstancesConfigOrDict] = None, ) -> types.EvaluateInstancesResponse: @@ -896,6 +910,7 @@ def _evaluate_instances( tool_name_match_input=tool_name_match_input, tool_parameter_key_match_input=tool_parameter_key_match_input, tool_parameter_kv_match_input=tool_parameter_kv_match_input, + rubric_based_metric_input=rubric_based_metric_input, autorater_config=autorater_config, config=config, ) @@ -1199,6 +1214,7 @@ async def _evaluate_instances( tool_parameter_kv_match_input: Optional[ types.ToolParameterKVMatchInputOrDict ] = None, + rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None, autorater_config: Optional[types.AutoraterConfigOrDict] = None, config: Optional[types.EvaluateInstancesConfigOrDict] = None, ) -> types.EvaluateInstancesResponse: @@ -1214,6 +1230,7 @@ async def _evaluate_instances( tool_name_match_input=tool_name_match_input, tool_parameter_key_match_input=tool_parameter_key_match_input, tool_parameter_kv_match_input=tool_parameter_kv_match_input, + rubric_based_metric_input=rubric_based_metric_input, autorater_config=autorater_config, config=config, ) diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index ab4ab5566a..b2631b20c3 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -233,6 +233,19 @@ class RubricContentType(_common.CaseInSensitiveEnum): """Generate rubrics in a unit test format.""" +class Importance(_common.CaseInSensitiveEnum): + """Importance level of the rubric.""" + + IMPORTANCE_UNSPECIFIED = "IMPORTANCE_UNSPECIFIED" + """Importance is not specified.""" + HIGH = "HIGH" + """High importance.""" + MEDIUM = "MEDIUM" + """Medium importance.""" + LOW = "LOW" + """Low importance.""" + + class GenerateMemoriesResponseGeneratedMemoryAction(_common.CaseInSensitiveEnum): """The action to take.""" @@ -249,19 +262,6 @@ class GenerateMemoriesResponseGeneratedMemoryAction(_common.CaseInSensitiveEnum) """The memory was deleted.""" -class Importance(_common.CaseInSensitiveEnum): - """Importance level of the rubric.""" - - IMPORTANCE_UNSPECIFIED = "IMPORTANCE_UNSPECIFIED" - """Importance is not specified.""" - HIGH = "HIGH" - """High importance.""" - MEDIUM = "MEDIUM" - """Medium importance.""" - LOW = "LOW" - """Low importance.""" - - class BleuInstance(_common.BaseModel): """Bleu instance.""" @@ -1114,857 +1114,903 @@ class EvaluateInstancesConfigDict(TypedDict, total=False): ] -class _EvaluateInstancesRequestParameters(_common.BaseModel): - """Parameters for evaluating instances.""" +class RubricContentProperty(_common.BaseModel): + """Defines criteria based on a specific property.""" - bleu_input: Optional[BleuInput] = Field(default=None, description="""""") - exact_match_input: Optional[ExactMatchInput] = Field( - default=None, description="""""" - ) - rouge_input: Optional[RougeInput] = Field(default=None, description="""""") - pointwise_metric_input: Optional[PointwiseMetricInput] = Field( - default=None, description="""""" - ) - pairwise_metric_input: Optional[PairwiseMetricInput] = Field( - default=None, description="""""" - ) - tool_call_valid_input: Optional[ToolCallValidInput] = Field( - default=None, description="""""" - ) - tool_name_match_input: Optional[ToolNameMatchInput] = Field( - default=None, description="""""" - ) - tool_parameter_key_match_input: Optional[ToolParameterKeyMatchInput] = Field( - default=None, description="""""" - ) - tool_parameter_kv_match_input: Optional[ToolParameterKVMatchInput] = Field( - default=None, description="""""" - ) - autorater_config: Optional[AutoraterConfig] = Field( - default=None, description="""""" + description: Optional[str] = Field( + default=None, + description="""Description of the property being evaluated. + Example: "The model's response is grammatically correct." """, ) - config: Optional[EvaluateInstancesConfig] = Field(default=None, description="""""") -class _EvaluateInstancesRequestParametersDict(TypedDict, total=False): - """Parameters for evaluating instances.""" - - bleu_input: Optional[BleuInputDict] - """""" +class RubricContentPropertyDict(TypedDict, total=False): + """Defines criteria based on a specific property.""" - exact_match_input: Optional[ExactMatchInputDict] - """""" + description: Optional[str] + """Description of the property being evaluated. + Example: "The model's response is grammatically correct." """ - rouge_input: Optional[RougeInputDict] - """""" - pointwise_metric_input: Optional[PointwiseMetricInputDict] - """""" +RubricContentPropertyOrDict = Union[RubricContentProperty, RubricContentPropertyDict] - pairwise_metric_input: Optional[PairwiseMetricInputDict] - """""" - tool_call_valid_input: Optional[ToolCallValidInputDict] - """""" +class RubricContent(_common.BaseModel): + """Content of the rubric, defining the testable criteria.""" - tool_name_match_input: Optional[ToolNameMatchInputDict] - """""" + property: Optional[RubricContentProperty] = Field( + default=None, + description="""Evaluation criteria based on a specific property.""", + ) - tool_parameter_key_match_input: Optional[ToolParameterKeyMatchInputDict] - """""" - tool_parameter_kv_match_input: Optional[ToolParameterKVMatchInputDict] - """""" +class RubricContentDict(TypedDict, total=False): + """Content of the rubric, defining the testable criteria.""" - autorater_config: Optional[AutoraterConfigDict] - """""" + property: Optional[RubricContentPropertyDict] + """Evaluation criteria based on a specific property.""" - config: Optional[EvaluateInstancesConfigDict] - """""" +RubricContentOrDict = Union[RubricContent, RubricContentDict] -_EvaluateInstancesRequestParametersOrDict = Union[ - _EvaluateInstancesRequestParameters, _EvaluateInstancesRequestParametersDict -] +class Rubric(_common.BaseModel): + """Message representing a single testable criterion for evaluation. -class BleuMetricValue(_common.BaseModel): - """Bleu metric value for an instance.""" + One input prompt could have multiple rubrics. + """ - score: Optional[float] = Field( - default=None, description="""Output only. Bleu score.""" + rubric_id: Optional[str] = Field( + default=None, + description="""Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""", + ) + content: Optional[RubricContent] = Field( + default=None, + description="""Required. The actual testable criteria for the rubric.""", + ) + type: Optional[str] = Field( + default=None, + description="""Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""", + ) + importance: Optional[Importance] = Field( + default=None, + description="""Optional. The relative importance of this rubric.""", ) -class BleuMetricValueDict(TypedDict, total=False): - """Bleu metric value for an instance.""" - - score: Optional[float] - """Output only. Bleu score.""" +class RubricDict(TypedDict, total=False): + """Message representing a single testable criterion for evaluation. + One input prompt could have multiple rubrics. + """ -BleuMetricValueOrDict = Union[BleuMetricValue, BleuMetricValueDict] + rubric_id: Optional[str] + """Required. Unique identifier for the rubric. + This ID is used to refer to this rubric, e.g., in RubricVerdict.""" + content: Optional[RubricContentDict] + """Required. The actual testable criteria for the rubric.""" -class BleuResults(_common.BaseModel): - """Results for bleu metric.""" + type: Optional[str] + """Optional. A type designator for the rubric, which can inform how it's + evaluated or interpreted by systems or users. + It's recommended to use consistent, well-defined, upper snake_case strings. + Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", + "INSTRUCTION_ADHERENCE".""" - bleu_metric_values: Optional[list[BleuMetricValue]] = Field( - default=None, description="""Output only. Bleu metric values.""" - ) + importance: Optional[Importance] + """Optional. The relative importance of this rubric.""" -class BleuResultsDict(TypedDict, total=False): - """Results for bleu metric.""" +RubricOrDict = Union[Rubric, RubricDict] - bleu_metric_values: Optional[list[BleuMetricValueDict]] - """Output only. Bleu metric values.""" +class RubricGenerationSpec(_common.BaseModel): + """Spec for generating rubrics.""" -BleuResultsOrDict = Union[BleuResults, BleuResultsDict] + prompt_template: Optional[str] = Field( + default=None, + description="""Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""", + ) + generator_model_config: Optional[AutoraterConfig] = Field( + default=None, + description="""Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""", + ) + rubric_content_type: Optional[RubricContentType] = Field( + default=None, + description="""The type of rubric content to be generated.""", + ) + rubric_type_ontology: Optional[list[str]] = Field( + default=None, + description="""An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""", + ) -class CometResult(_common.BaseModel): - """Spec for Comet result - calculates the comet score for the given instance using the version specified in the spec.""" +class RubricGenerationSpecDict(TypedDict, total=False): + """Spec for generating rubrics.""" - score: Optional[float] = Field( - default=None, - description="""Output only. Comet score. Range depends on version.""", - ) + prompt_template: Optional[str] + """Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""" + generator_model_config: Optional[AutoraterConfigDict] + """Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""" -class CometResultDict(TypedDict, total=False): - """Spec for Comet result - calculates the comet score for the given instance using the version specified in the spec.""" + rubric_content_type: Optional[RubricContentType] + """The type of rubric content to be generated.""" - score: Optional[float] - """Output only. Comet score. Range depends on version.""" + rubric_type_ontology: Optional[list[str]] + """An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""" -CometResultOrDict = Union[CometResult, CometResultDict] +RubricGenerationSpecOrDict = Union[RubricGenerationSpec, RubricGenerationSpecDict] -class ExactMatchMetricValue(_common.BaseModel): - """Exact match metric value for an instance.""" +class RubricBasedMetricSpec(_common.BaseModel): + """Specification for a metric that is based on rubrics.""" - score: Optional[float] = Field( - default=None, description="""Output only. Exact match score.""" + metric_prompt_template: Optional[str] = Field( + default=None, + description="""Template for the prompt used by the judge model to evaluate against + rubrics.""", + ) + judge_autorater_config: Optional[AutoraterConfig] = Field( + default=None, + description="""Optional configuration for the judge LLM (Autorater).""", + ) + inline_rubrics: Optional[list[Rubric]] = Field( + default=None, + description="""Use rubrics provided directly in the spec.""", + ) + rubric_group_key: Optional[str] = Field( + default=None, + description="""Use a pre-defined group of rubrics associated with the input content. + This refers to a key in the `rubric_groups` map of + `RubricEnhancedContents`.""", + ) + rubric_generation_spec: Optional[RubricGenerationSpec] = Field( + default=None, + description="""Dynamically generate rubrics for evaluation using this specification.""", ) -class ExactMatchMetricValueDict(TypedDict, total=False): - """Exact match metric value for an instance.""" +class RubricBasedMetricSpecDict(TypedDict, total=False): + """Specification for a metric that is based on rubrics.""" - score: Optional[float] - """Output only. Exact match score.""" + metric_prompt_template: Optional[str] + """Template for the prompt used by the judge model to evaluate against + rubrics.""" + judge_autorater_config: Optional[AutoraterConfigDict] + """Optional configuration for the judge LLM (Autorater).""" -ExactMatchMetricValueOrDict = Union[ExactMatchMetricValue, ExactMatchMetricValueDict] + inline_rubrics: Optional[list[RubricDict]] + """Use rubrics provided directly in the spec.""" + rubric_group_key: Optional[str] + """Use a pre-defined group of rubrics associated with the input content. + This refers to a key in the `rubric_groups` map of + `RubricEnhancedContents`.""" -class ExactMatchResults(_common.BaseModel): - """Results for exact match metric.""" + rubric_generation_spec: Optional[RubricGenerationSpecDict] + """Dynamically generate rubrics for evaluation using this specification.""" - exact_match_metric_values: Optional[list[ExactMatchMetricValue]] = Field( - default=None, description="""Output only. Exact match metric values.""" + +RubricBasedMetricSpecOrDict = Union[RubricBasedMetricSpec, RubricBasedMetricSpecDict] + + +class ContentMap(_common.BaseModel): + """Map of placeholder in metric prompt template to contents of model input.""" + + values: Optional[dict[str, list[genai_types.Content]]] = Field( + default=None, description="""Map of placeholder to contents.""" ) -class ExactMatchResultsDict(TypedDict, total=False): - """Results for exact match metric.""" +class ContentMapDict(TypedDict, total=False): + """Map of placeholder in metric prompt template to contents of model input.""" - exact_match_metric_values: Optional[list[ExactMatchMetricValueDict]] - """Output only. Exact match metric values.""" + values: Optional[dict[str, list[genai_types.Content]]] + """Map of placeholder to contents.""" -ExactMatchResultsOrDict = Union[ExactMatchResults, ExactMatchResultsDict] +ContentMapOrDict = Union[ContentMap, ContentMapDict] -class MetricxResult(_common.BaseModel): - """Spec for MetricX result - calculates the MetricX score for the given instance using the version specified in the spec.""" +class RubricEnhancedContents(_common.BaseModel): + """Rubric-enhanced contents for evaluation.""" - score: Optional[float] = Field( + prompt: Optional[list[genai_types.Content]] = Field( default=None, - description="""Output only. MetricX score. Range depends on version.""", + description="""User prompt, using the standard Content type from the Gen AI SDK.""", ) - - -class MetricxResultDict(TypedDict, total=False): - """Spec for MetricX result - calculates the MetricX score for the given instance using the version specified in the spec.""" - - score: Optional[float] - """Output only. MetricX score. Range depends on version.""" - - -MetricxResultOrDict = Union[MetricxResult, MetricxResultDict] - - -class RawOutput(_common.BaseModel): - """Raw output.""" - - raw_output: Optional[list[str]] = Field( - default=None, description="""Output only. Raw output string.""" + rubric_groups: Optional[dict[str, "RubricGroup"]] = Field( + default=None, + description="""Named groups of rubrics associated with this prompt. + The key is a user-defined name for the rubric group.""", + ) + response: Optional[list[genai_types.Content]] = Field( + default=None, + description="""Response, using the standard Content type from the Gen AI SDK.""", + ) + other_content: Optional[ContentMap] = Field( + default=None, + description="""Other contents needed for the metric. + For example, if `reference` is needed for the metric, it can be provided + here.""", ) -class RawOutputDict(TypedDict, total=False): - """Raw output.""" - - raw_output: Optional[list[str]] - """Output only. Raw output string.""" - - -RawOutputOrDict = Union[RawOutput, RawOutputDict] - - -class CustomOutput(_common.BaseModel): - """Spec for custom output.""" +class RubricEnhancedContentsDict(TypedDict, total=False): + """Rubric-enhanced contents for evaluation.""" - raw_outputs: Optional[RawOutput] = Field( - default=None, description="""Output only. List of raw output strings.""" - ) + prompt: Optional[list[genai_types.Content]] + """User prompt, using the standard Content type from the Gen AI SDK.""" + rubric_groups: Optional[dict[str, "RubricGroup"]] + """Named groups of rubrics associated with this prompt. + The key is a user-defined name for the rubric group.""" -class CustomOutputDict(TypedDict, total=False): - """Spec for custom output.""" + response: Optional[list[genai_types.Content]] + """Response, using the standard Content type from the Gen AI SDK.""" - raw_outputs: Optional[RawOutputDict] - """Output only. List of raw output strings.""" + other_content: Optional[ContentMapDict] + """Other contents needed for the metric. + For example, if `reference` is needed for the metric, it can be provided + here.""" -CustomOutputOrDict = Union[CustomOutput, CustomOutputDict] +RubricEnhancedContentsOrDict = Union[RubricEnhancedContents, RubricEnhancedContentsDict] -class PairwiseMetricResult(_common.BaseModel): - """Spec for pairwise metric result.""" +class RubricBasedMetricInstance(_common.BaseModel): + """Defines an instance for Rubric-based metrics, allowing various input formats.""" - custom_output: Optional[CustomOutput] = Field( - default=None, description="""Output only. Spec for custom output.""" + json_instance: Optional[str] = Field( + default=None, + description="""Specify evaluation fields and their string values in JSON format.""", ) - explanation: Optional[str] = Field( + content_map_instance: Optional[ContentMap] = Field( default=None, - description="""Output only. Explanation for pairwise metric score.""", + description="""Specify evaluation fields and their content values using a ContentMap.""", ) - pairwise_choice: Optional[PairwiseChoice] = Field( - default=None, description="""Output only. Pairwise metric choice.""" + rubric_enhanced_contents: Optional[RubricEnhancedContents] = Field( + default=None, + description="""Provide input as Gemini Content along with one or more + associated rubric groups.""", ) -class PairwiseMetricResultDict(TypedDict, total=False): - """Spec for pairwise metric result.""" +class RubricBasedMetricInstanceDict(TypedDict, total=False): + """Defines an instance for Rubric-based metrics, allowing various input formats.""" - custom_output: Optional[CustomOutputDict] - """Output only. Spec for custom output.""" + json_instance: Optional[str] + """Specify evaluation fields and their string values in JSON format.""" - explanation: Optional[str] - """Output only. Explanation for pairwise metric score.""" + content_map_instance: Optional[ContentMapDict] + """Specify evaluation fields and their content values using a ContentMap.""" - pairwise_choice: Optional[PairwiseChoice] - """Output only. Pairwise metric choice.""" + rubric_enhanced_contents: Optional[RubricEnhancedContentsDict] + """Provide input as Gemini Content along with one or more + associated rubric groups.""" -PairwiseMetricResultOrDict = Union[PairwiseMetricResult, PairwiseMetricResultDict] +RubricBasedMetricInstanceOrDict = Union[ + RubricBasedMetricInstance, RubricBasedMetricInstanceDict +] -class PointwiseMetricResult(_common.BaseModel): - """Spec for pointwise metric result.""" +class RubricBasedMetricInput(_common.BaseModel): + """Input for a rubric-based metrics.""" - custom_output: Optional[CustomOutput] = Field( - default=None, description="""Output only. Spec for custom output.""" - ) - explanation: Optional[str] = Field( + metric_spec: Optional[RubricBasedMetricSpec] = Field( default=None, - description="""Output only. Explanation for pointwise metric score.""", + description="""Specification for the rubric-based metric.""", ) - score: Optional[float] = Field( - default=None, description="""Output only. Pointwise metric score.""" + instance: Optional[RubricBasedMetricInstance] = Field( + default=None, description="""The instance to be evaluated.""" ) -class PointwiseMetricResultDict(TypedDict, total=False): - """Spec for pointwise metric result.""" - - custom_output: Optional[CustomOutputDict] - """Output only. Spec for custom output.""" +class RubricBasedMetricInputDict(TypedDict, total=False): + """Input for a rubric-based metrics.""" - explanation: Optional[str] - """Output only. Explanation for pointwise metric score.""" + metric_spec: Optional[RubricBasedMetricSpecDict] + """Specification for the rubric-based metric.""" - score: Optional[float] - """Output only. Pointwise metric score.""" + instance: Optional[RubricBasedMetricInstanceDict] + """The instance to be evaluated.""" -PointwiseMetricResultOrDict = Union[PointwiseMetricResult, PointwiseMetricResultDict] +RubricBasedMetricInputOrDict = Union[RubricBasedMetricInput, RubricBasedMetricInputDict] -class RougeMetricValue(_common.BaseModel): - """Rouge metric value for an instance.""" +class _EvaluateInstancesRequestParameters(_common.BaseModel): + """Parameters for evaluating instances.""" - score: Optional[float] = Field( - default=None, description="""Output only. Rouge score.""" + bleu_input: Optional[BleuInput] = Field(default=None, description="""""") + exact_match_input: Optional[ExactMatchInput] = Field( + default=None, description="""""" ) + rouge_input: Optional[RougeInput] = Field(default=None, description="""""") + pointwise_metric_input: Optional[PointwiseMetricInput] = Field( + default=None, description="""""" + ) + pairwise_metric_input: Optional[PairwiseMetricInput] = Field( + default=None, description="""""" + ) + tool_call_valid_input: Optional[ToolCallValidInput] = Field( + default=None, description="""""" + ) + tool_name_match_input: Optional[ToolNameMatchInput] = Field( + default=None, description="""""" + ) + tool_parameter_key_match_input: Optional[ToolParameterKeyMatchInput] = Field( + default=None, description="""""" + ) + tool_parameter_kv_match_input: Optional[ToolParameterKVMatchInput] = Field( + default=None, description="""""" + ) + rubric_based_metric_input: Optional[RubricBasedMetricInput] = Field( + default=None, description="""""" + ) + autorater_config: Optional[AutoraterConfig] = Field( + default=None, description="""""" + ) + config: Optional[EvaluateInstancesConfig] = Field(default=None, description="""""") -class RougeMetricValueDict(TypedDict, total=False): - """Rouge metric value for an instance.""" +class _EvaluateInstancesRequestParametersDict(TypedDict, total=False): + """Parameters for evaluating instances.""" - score: Optional[float] - """Output only. Rouge score.""" + bleu_input: Optional[BleuInputDict] + """""" + exact_match_input: Optional[ExactMatchInputDict] + """""" -RougeMetricValueOrDict = Union[RougeMetricValue, RougeMetricValueDict] + rouge_input: Optional[RougeInputDict] + """""" + pointwise_metric_input: Optional[PointwiseMetricInputDict] + """""" -class RougeResults(_common.BaseModel): - """Results for rouge metric.""" + pairwise_metric_input: Optional[PairwiseMetricInputDict] + """""" - rouge_metric_values: Optional[list[RougeMetricValue]] = Field( - default=None, description="""Output only. Rouge metric values.""" - ) + tool_call_valid_input: Optional[ToolCallValidInputDict] + """""" + tool_name_match_input: Optional[ToolNameMatchInputDict] + """""" -class RougeResultsDict(TypedDict, total=False): - """Results for rouge metric.""" + tool_parameter_key_match_input: Optional[ToolParameterKeyMatchInputDict] + """""" - rouge_metric_values: Optional[list[RougeMetricValueDict]] - """Output only. Rouge metric values.""" + tool_parameter_kv_match_input: Optional[ToolParameterKVMatchInputDict] + """""" + rubric_based_metric_input: Optional[RubricBasedMetricInputDict] + """""" -RougeResultsOrDict = Union[RougeResults, RougeResultsDict] + autorater_config: Optional[AutoraterConfigDict] + """""" + config: Optional[EvaluateInstancesConfigDict] + """""" -class RubricCritiqueResult(_common.BaseModel): - """Rubric critique result.""" - rubric: Optional[str] = Field( - default=None, description="""Output only. Rubric to be evaluated.""" +_EvaluateInstancesRequestParametersOrDict = Union[ + _EvaluateInstancesRequestParameters, _EvaluateInstancesRequestParametersDict +] + + +class RubricVerdict(_common.BaseModel): + """Represents the verdict of an evaluation against a single rubric.""" + + evaluated_rubric: Optional[Rubric] = Field( + default=None, + description="""Required. The full rubric definition that was evaluated. + Storing this ensures the verdict is self-contained and understandable, + especially if the original rubric definition changes or was dynamically + generated.""", ) verdict: Optional[bool] = Field( default=None, - description="""Output only. Verdict for the rubric - true if the rubric is met, false otherwise.""", + description="""Required. Outcome of the evaluation against the rubric, represented as a + boolean. `true` indicates a "Pass", `false` indicates a "Fail".""", + ) + reasoning: Optional[str] = Field( + default=None, + description="""Optional. Human-readable reasoning or explanation for the verdict. + This can include specific examples or details from the evaluated content + that justify the given verdict.""", ) -class RubricCritiqueResultDict(TypedDict, total=False): - """Rubric critique result.""" +class RubricVerdictDict(TypedDict, total=False): + """Represents the verdict of an evaluation against a single rubric.""" - rubric: Optional[str] - """Output only. Rubric to be evaluated.""" + evaluated_rubric: Optional[RubricDict] + """Required. The full rubric definition that was evaluated. + Storing this ensures the verdict is self-contained and understandable, + especially if the original rubric definition changes or was dynamically + generated.""" verdict: Optional[bool] - """Output only. Verdict for the rubric - true if the rubric is met, false otherwise.""" + """Required. Outcome of the evaluation against the rubric, represented as a + boolean. `true` indicates a "Pass", `false` indicates a "Fail".""" + reasoning: Optional[str] + """Optional. Human-readable reasoning or explanation for the verdict. + This can include specific examples or details from the evaluated content + that justify the given verdict.""" -RubricCritiqueResultOrDict = Union[RubricCritiqueResult, RubricCritiqueResultDict] +RubricVerdictOrDict = Union[RubricVerdict, RubricVerdictDict] -class RubricBasedInstructionFollowingResult(_common.BaseModel): - """Result for RubricBasedInstructionFollowing metric.""" - rubric_critique_results: Optional[list[RubricCritiqueResult]] = Field( - default=None, - description="""Output only. List of per rubric critique results.""", - ) +class RubricBasedMetricResult(_common.BaseModel): + """Result for a rubric-based metric.""" + score: Optional[float] = Field( + default=None, description="""Passing rate of all the rubrics.""" + ) + rubric_verdicts: Optional[list[RubricVerdict]] = Field( default=None, - description="""Output only. Overall score for the instruction following.""", + description="""The details of all the rubrics and their verdicts.""", ) -class RubricBasedInstructionFollowingResultDict(TypedDict, total=False): - """Result for RubricBasedInstructionFollowing metric.""" - - rubric_critique_results: Optional[list[RubricCritiqueResultDict]] - """Output only. List of per rubric critique results.""" +class RubricBasedMetricResultDict(TypedDict, total=False): + """Result for a rubric-based metric.""" score: Optional[float] - """Output only. Overall score for the instruction following.""" + """Passing rate of all the rubrics.""" + + rubric_verdicts: Optional[list[RubricVerdictDict]] + """The details of all the rubrics and their verdicts.""" -RubricBasedInstructionFollowingResultOrDict = Union[ - RubricBasedInstructionFollowingResult, - RubricBasedInstructionFollowingResultDict, +RubricBasedMetricResultOrDict = Union[ + RubricBasedMetricResult, RubricBasedMetricResultDict ] -class SummarizationVerbosityResult(_common.BaseModel): - """Spec for summarization verbosity result.""" +class BleuMetricValue(_common.BaseModel): + """Bleu metric value for an instance.""" - confidence: Optional[float] = Field( - default=None, - description="""Output only. Confidence for summarization verbosity score.""", - ) - explanation: Optional[str] = Field( - default=None, - description="""Output only. Explanation for summarization verbosity score.""", - ) score: Optional[float] = Field( - default=None, - description="""Output only. Summarization Verbosity score.""", + default=None, description="""Output only. Bleu score.""" ) -class SummarizationVerbosityResultDict(TypedDict, total=False): - """Spec for summarization verbosity result.""" - - confidence: Optional[float] - """Output only. Confidence for summarization verbosity score.""" - - explanation: Optional[str] - """Output only. Explanation for summarization verbosity score.""" +class BleuMetricValueDict(TypedDict, total=False): + """Bleu metric value for an instance.""" score: Optional[float] - """Output only. Summarization Verbosity score.""" + """Output only. Bleu score.""" -SummarizationVerbosityResultOrDict = Union[ - SummarizationVerbosityResult, SummarizationVerbosityResultDict -] +BleuMetricValueOrDict = Union[BleuMetricValue, BleuMetricValueDict] -class ToolCallValidMetricValue(_common.BaseModel): - """Tool call valid metric value for an instance.""" +class BleuResults(_common.BaseModel): + """Results for bleu metric.""" - score: Optional[float] = Field( - default=None, description="""Output only. Tool call valid score.""" + bleu_metric_values: Optional[list[BleuMetricValue]] = Field( + default=None, description="""Output only. Bleu metric values.""" ) -class ToolCallValidMetricValueDict(TypedDict, total=False): - """Tool call valid metric value for an instance.""" +class BleuResultsDict(TypedDict, total=False): + """Results for bleu metric.""" - score: Optional[float] - """Output only. Tool call valid score.""" + bleu_metric_values: Optional[list[BleuMetricValueDict]] + """Output only. Bleu metric values.""" -ToolCallValidMetricValueOrDict = Union[ - ToolCallValidMetricValue, ToolCallValidMetricValueDict -] +BleuResultsOrDict = Union[BleuResults, BleuResultsDict] -class ToolCallValidResults(_common.BaseModel): - """Results for tool call valid metric.""" +class CometResult(_common.BaseModel): + """Spec for Comet result - calculates the comet score for the given instance using the version specified in the spec.""" - tool_call_valid_metric_values: Optional[list[ToolCallValidMetricValue]] = Field( + score: Optional[float] = Field( default=None, - description="""Output only. Tool call valid metric values.""", + description="""Output only. Comet score. Range depends on version.""", ) -class ToolCallValidResultsDict(TypedDict, total=False): - """Results for tool call valid metric.""" +class CometResultDict(TypedDict, total=False): + """Spec for Comet result - calculates the comet score for the given instance using the version specified in the spec.""" - tool_call_valid_metric_values: Optional[list[ToolCallValidMetricValueDict]] - """Output only. Tool call valid metric values.""" + score: Optional[float] + """Output only. Comet score. Range depends on version.""" -ToolCallValidResultsOrDict = Union[ToolCallValidResults, ToolCallValidResultsDict] +CometResultOrDict = Union[CometResult, CometResultDict] -class ToolNameMatchMetricValue(_common.BaseModel): - """Tool name match metric value for an instance.""" +class ExactMatchMetricValue(_common.BaseModel): + """Exact match metric value for an instance.""" score: Optional[float] = Field( - default=None, description="""Output only. Tool name match score.""" + default=None, description="""Output only. Exact match score.""" ) -class ToolNameMatchMetricValueDict(TypedDict, total=False): - """Tool name match metric value for an instance.""" +class ExactMatchMetricValueDict(TypedDict, total=False): + """Exact match metric value for an instance.""" score: Optional[float] - """Output only. Tool name match score.""" + """Output only. Exact match score.""" -ToolNameMatchMetricValueOrDict = Union[ - ToolNameMatchMetricValue, ToolNameMatchMetricValueDict -] +ExactMatchMetricValueOrDict = Union[ExactMatchMetricValue, ExactMatchMetricValueDict] -class ToolNameMatchResults(_common.BaseModel): - """Results for tool name match metric.""" +class ExactMatchResults(_common.BaseModel): + """Results for exact match metric.""" - tool_name_match_metric_values: Optional[list[ToolNameMatchMetricValue]] = Field( - default=None, - description="""Output only. Tool name match metric values.""", + exact_match_metric_values: Optional[list[ExactMatchMetricValue]] = Field( + default=None, description="""Output only. Exact match metric values.""" ) -class ToolNameMatchResultsDict(TypedDict, total=False): - """Results for tool name match metric.""" +class ExactMatchResultsDict(TypedDict, total=False): + """Results for exact match metric.""" - tool_name_match_metric_values: Optional[list[ToolNameMatchMetricValueDict]] - """Output only. Tool name match metric values.""" + exact_match_metric_values: Optional[list[ExactMatchMetricValueDict]] + """Output only. Exact match metric values.""" -ToolNameMatchResultsOrDict = Union[ToolNameMatchResults, ToolNameMatchResultsDict] +ExactMatchResultsOrDict = Union[ExactMatchResults, ExactMatchResultsDict] -class ToolParameterKeyMatchMetricValue(_common.BaseModel): - """Tool parameter key match metric value for an instance.""" +class MetricxResult(_common.BaseModel): + """Spec for MetricX result - calculates the MetricX score for the given instance using the version specified in the spec.""" score: Optional[float] = Field( default=None, - description="""Output only. Tool parameter key match score.""", + description="""Output only. MetricX score. Range depends on version.""", ) -class ToolParameterKeyMatchMetricValueDict(TypedDict, total=False): - """Tool parameter key match metric value for an instance.""" +class MetricxResultDict(TypedDict, total=False): + """Spec for MetricX result - calculates the MetricX score for the given instance using the version specified in the spec.""" score: Optional[float] - """Output only. Tool parameter key match score.""" + """Output only. MetricX score. Range depends on version.""" -ToolParameterKeyMatchMetricValueOrDict = Union[ - ToolParameterKeyMatchMetricValue, ToolParameterKeyMatchMetricValueDict -] +MetricxResultOrDict = Union[MetricxResult, MetricxResultDict] -class ToolParameterKeyMatchResults(_common.BaseModel): - """Results for tool parameter key match metric.""" +class RawOutput(_common.BaseModel): + """Raw output.""" - tool_parameter_key_match_metric_values: Optional[ - list[ToolParameterKeyMatchMetricValue] - ] = Field( - default=None, - description="""Output only. Tool parameter key match metric values.""", + raw_output: Optional[list[str]] = Field( + default=None, description="""Output only. Raw output string.""" ) -class ToolParameterKeyMatchResultsDict(TypedDict, total=False): - """Results for tool parameter key match metric.""" +class RawOutputDict(TypedDict, total=False): + """Raw output.""" - tool_parameter_key_match_metric_values: Optional[ - list[ToolParameterKeyMatchMetricValueDict] - ] - """Output only. Tool parameter key match metric values.""" + raw_output: Optional[list[str]] + """Output only. Raw output string.""" -ToolParameterKeyMatchResultsOrDict = Union[ - ToolParameterKeyMatchResults, ToolParameterKeyMatchResultsDict -] +RawOutputOrDict = Union[RawOutput, RawOutputDict] -class ToolParameterKVMatchMetricValue(_common.BaseModel): - """Tool parameter key value match metric value for an instance.""" +class CustomOutput(_common.BaseModel): + """Spec for custom output.""" - score: Optional[float] = Field( - default=None, - description="""Output only. Tool parameter key value match score.""", + raw_outputs: Optional[RawOutput] = Field( + default=None, description="""Output only. List of raw output strings.""" ) -class ToolParameterKVMatchMetricValueDict(TypedDict, total=False): - """Tool parameter key value match metric value for an instance.""" +class CustomOutputDict(TypedDict, total=False): + """Spec for custom output.""" - score: Optional[float] - """Output only. Tool parameter key value match score.""" + raw_outputs: Optional[RawOutputDict] + """Output only. List of raw output strings.""" -ToolParameterKVMatchMetricValueOrDict = Union[ - ToolParameterKVMatchMetricValue, ToolParameterKVMatchMetricValueDict -] +CustomOutputOrDict = Union[CustomOutput, CustomOutputDict] -class ToolParameterKVMatchResults(_common.BaseModel): - """Results for tool parameter key value match metric.""" +class PairwiseMetricResult(_common.BaseModel): + """Spec for pairwise metric result.""" - tool_parameter_kv_match_metric_values: Optional[ - list[ToolParameterKVMatchMetricValue] - ] = Field( + custom_output: Optional[CustomOutput] = Field( + default=None, description="""Output only. Spec for custom output.""" + ) + explanation: Optional[str] = Field( default=None, - description="""Output only. Tool parameter key value match metric values.""", + description="""Output only. Explanation for pairwise metric score.""", + ) + pairwise_choice: Optional[PairwiseChoice] = Field( + default=None, description="""Output only. Pairwise metric choice.""" ) -class ToolParameterKVMatchResultsDict(TypedDict, total=False): - """Results for tool parameter key value match metric.""" - - tool_parameter_kv_match_metric_values: Optional[ - list[ToolParameterKVMatchMetricValueDict] - ] - """Output only. Tool parameter key value match metric values.""" - - -ToolParameterKVMatchResultsOrDict = Union[ - ToolParameterKVMatchResults, ToolParameterKVMatchResultsDict -] - - -class TrajectoryAnyOrderMatchMetricValue(_common.BaseModel): - """TrajectoryAnyOrderMatch metric value for an instance.""" - - score: Optional[float] = Field( - default=None, - description="""Output only. TrajectoryAnyOrderMatch score.""", - ) +class PairwiseMetricResultDict(TypedDict, total=False): + """Spec for pairwise metric result.""" + custom_output: Optional[CustomOutputDict] + """Output only. Spec for custom output.""" -class TrajectoryAnyOrderMatchMetricValueDict(TypedDict, total=False): - """TrajectoryAnyOrderMatch metric value for an instance.""" + explanation: Optional[str] + """Output only. Explanation for pairwise metric score.""" - score: Optional[float] - """Output only. TrajectoryAnyOrderMatch score.""" + pairwise_choice: Optional[PairwiseChoice] + """Output only. Pairwise metric choice.""" -TrajectoryAnyOrderMatchMetricValueOrDict = Union[ - TrajectoryAnyOrderMatchMetricValue, TrajectoryAnyOrderMatchMetricValueDict -] +PairwiseMetricResultOrDict = Union[PairwiseMetricResult, PairwiseMetricResultDict] -class TrajectoryAnyOrderMatchResults(_common.BaseModel): - """Results for TrajectoryAnyOrderMatch metric.""" +class PointwiseMetricResult(_common.BaseModel): + """Spec for pointwise metric result.""" - trajectory_any_order_match_metric_values: Optional[ - list[TrajectoryAnyOrderMatchMetricValue] - ] = Field( + custom_output: Optional[CustomOutput] = Field( + default=None, description="""Output only. Spec for custom output.""" + ) + explanation: Optional[str] = Field( default=None, - description="""Output only. TrajectoryAnyOrderMatch metric values.""", + description="""Output only. Explanation for pointwise metric score.""", + ) + score: Optional[float] = Field( + default=None, description="""Output only. Pointwise metric score.""" ) -class TrajectoryAnyOrderMatchResultsDict(TypedDict, total=False): - """Results for TrajectoryAnyOrderMatch metric.""" +class PointwiseMetricResultDict(TypedDict, total=False): + """Spec for pointwise metric result.""" - trajectory_any_order_match_metric_values: Optional[ - list[TrajectoryAnyOrderMatchMetricValueDict] - ] - """Output only. TrajectoryAnyOrderMatch metric values.""" + custom_output: Optional[CustomOutputDict] + """Output only. Spec for custom output.""" + explanation: Optional[str] + """Output only. Explanation for pointwise metric score.""" -TrajectoryAnyOrderMatchResultsOrDict = Union[ - TrajectoryAnyOrderMatchResults, TrajectoryAnyOrderMatchResultsDict -] + score: Optional[float] + """Output only. Pointwise metric score.""" -class TrajectoryExactMatchMetricValue(_common.BaseModel): - """TrajectoryExactMatch metric value for an instance.""" +PointwiseMetricResultOrDict = Union[PointwiseMetricResult, PointwiseMetricResultDict] + + +class RougeMetricValue(_common.BaseModel): + """Rouge metric value for an instance.""" score: Optional[float] = Field( - default=None, description="""Output only. TrajectoryExactMatch score.""" + default=None, description="""Output only. Rouge score.""" ) -class TrajectoryExactMatchMetricValueDict(TypedDict, total=False): - """TrajectoryExactMatch metric value for an instance.""" +class RougeMetricValueDict(TypedDict, total=False): + """Rouge metric value for an instance.""" score: Optional[float] - """Output only. TrajectoryExactMatch score.""" + """Output only. Rouge score.""" -TrajectoryExactMatchMetricValueOrDict = Union[ - TrajectoryExactMatchMetricValue, TrajectoryExactMatchMetricValueDict -] +RougeMetricValueOrDict = Union[RougeMetricValue, RougeMetricValueDict] -class TrajectoryExactMatchResults(_common.BaseModel): - """Results for TrajectoryExactMatch metric.""" +class RougeResults(_common.BaseModel): + """Results for rouge metric.""" - trajectory_exact_match_metric_values: Optional[ - list[TrajectoryExactMatchMetricValue] - ] = Field( - default=None, - description="""Output only. TrajectoryExactMatch metric values.""", + rouge_metric_values: Optional[list[RougeMetricValue]] = Field( + default=None, description="""Output only. Rouge metric values.""" ) -class TrajectoryExactMatchResultsDict(TypedDict, total=False): - """Results for TrajectoryExactMatch metric.""" +class RougeResultsDict(TypedDict, total=False): + """Results for rouge metric.""" - trajectory_exact_match_metric_values: Optional[ - list[TrajectoryExactMatchMetricValueDict] - ] - """Output only. TrajectoryExactMatch metric values.""" + rouge_metric_values: Optional[list[RougeMetricValueDict]] + """Output only. Rouge metric values.""" -TrajectoryExactMatchResultsOrDict = Union[ - TrajectoryExactMatchResults, TrajectoryExactMatchResultsDict -] +RougeResultsOrDict = Union[RougeResults, RougeResultsDict] -class TrajectoryInOrderMatchMetricValue(_common.BaseModel): - """TrajectoryInOrderMatch metric value for an instance.""" +class ToolCallValidMetricValue(_common.BaseModel): + """Tool call valid metric value for an instance.""" score: Optional[float] = Field( - default=None, - description="""Output only. TrajectoryInOrderMatch score.""", + default=None, description="""Output only. Tool call valid score.""" ) -class TrajectoryInOrderMatchMetricValueDict(TypedDict, total=False): - """TrajectoryInOrderMatch metric value for an instance.""" +class ToolCallValidMetricValueDict(TypedDict, total=False): + """Tool call valid metric value for an instance.""" score: Optional[float] - """Output only. TrajectoryInOrderMatch score.""" + """Output only. Tool call valid score.""" -TrajectoryInOrderMatchMetricValueOrDict = Union[ - TrajectoryInOrderMatchMetricValue, TrajectoryInOrderMatchMetricValueDict +ToolCallValidMetricValueOrDict = Union[ + ToolCallValidMetricValue, ToolCallValidMetricValueDict ] -class TrajectoryInOrderMatchResults(_common.BaseModel): - """Results for TrajectoryInOrderMatch metric.""" +class ToolCallValidResults(_common.BaseModel): + """Results for tool call valid metric.""" - trajectory_in_order_match_metric_values: Optional[ - list[TrajectoryInOrderMatchMetricValue] - ] = Field( + tool_call_valid_metric_values: Optional[list[ToolCallValidMetricValue]] = Field( default=None, - description="""Output only. TrajectoryInOrderMatch metric values.""", + description="""Output only. Tool call valid metric values.""", ) -class TrajectoryInOrderMatchResultsDict(TypedDict, total=False): - """Results for TrajectoryInOrderMatch metric.""" +class ToolCallValidResultsDict(TypedDict, total=False): + """Results for tool call valid metric.""" - trajectory_in_order_match_metric_values: Optional[ - list[TrajectoryInOrderMatchMetricValueDict] - ] - """Output only. TrajectoryInOrderMatch metric values.""" + tool_call_valid_metric_values: Optional[list[ToolCallValidMetricValueDict]] + """Output only. Tool call valid metric values.""" -TrajectoryInOrderMatchResultsOrDict = Union[ - TrajectoryInOrderMatchResults, TrajectoryInOrderMatchResultsDict -] +ToolCallValidResultsOrDict = Union[ToolCallValidResults, ToolCallValidResultsDict] -class TrajectoryPrecisionMetricValue(_common.BaseModel): - """TrajectoryPrecision metric value for an instance.""" +class ToolNameMatchMetricValue(_common.BaseModel): + """Tool name match metric value for an instance.""" score: Optional[float] = Field( - default=None, description="""Output only. TrajectoryPrecision score.""" + default=None, description="""Output only. Tool name match score.""" ) -class TrajectoryPrecisionMetricValueDict(TypedDict, total=False): - """TrajectoryPrecision metric value for an instance.""" +class ToolNameMatchMetricValueDict(TypedDict, total=False): + """Tool name match metric value for an instance.""" score: Optional[float] - """Output only. TrajectoryPrecision score.""" + """Output only. Tool name match score.""" -TrajectoryPrecisionMetricValueOrDict = Union[ - TrajectoryPrecisionMetricValue, TrajectoryPrecisionMetricValueDict +ToolNameMatchMetricValueOrDict = Union[ + ToolNameMatchMetricValue, ToolNameMatchMetricValueDict ] -class TrajectoryPrecisionResults(_common.BaseModel): - """Results for TrajectoryPrecision metric.""" +class ToolNameMatchResults(_common.BaseModel): + """Results for tool name match metric.""" - trajectory_precision_metric_values: Optional[ - list[TrajectoryPrecisionMetricValue] - ] = Field( + tool_name_match_metric_values: Optional[list[ToolNameMatchMetricValue]] = Field( default=None, - description="""Output only. TrajectoryPrecision metric values.""", + description="""Output only. Tool name match metric values.""", ) -class TrajectoryPrecisionResultsDict(TypedDict, total=False): - """Results for TrajectoryPrecision metric.""" +class ToolNameMatchResultsDict(TypedDict, total=False): + """Results for tool name match metric.""" - trajectory_precision_metric_values: Optional[ - list[TrajectoryPrecisionMetricValueDict] - ] - """Output only. TrajectoryPrecision metric values.""" + tool_name_match_metric_values: Optional[list[ToolNameMatchMetricValueDict]] + """Output only. Tool name match metric values.""" -TrajectoryPrecisionResultsOrDict = Union[ - TrajectoryPrecisionResults, TrajectoryPrecisionResultsDict -] +ToolNameMatchResultsOrDict = Union[ToolNameMatchResults, ToolNameMatchResultsDict] -class TrajectoryRecallMetricValue(_common.BaseModel): - """TrajectoryRecall metric value for an instance.""" +class ToolParameterKeyMatchMetricValue(_common.BaseModel): + """Tool parameter key match metric value for an instance.""" score: Optional[float] = Field( - default=None, description="""Output only. TrajectoryRecall score.""" + default=None, + description="""Output only. Tool parameter key match score.""", ) -class TrajectoryRecallMetricValueDict(TypedDict, total=False): - """TrajectoryRecall metric value for an instance.""" +class ToolParameterKeyMatchMetricValueDict(TypedDict, total=False): + """Tool parameter key match metric value for an instance.""" score: Optional[float] - """Output only. TrajectoryRecall score.""" + """Output only. Tool parameter key match score.""" -TrajectoryRecallMetricValueOrDict = Union[ - TrajectoryRecallMetricValue, TrajectoryRecallMetricValueDict +ToolParameterKeyMatchMetricValueOrDict = Union[ + ToolParameterKeyMatchMetricValue, ToolParameterKeyMatchMetricValueDict ] -class TrajectoryRecallResults(_common.BaseModel): - """Results for TrajectoryRecall metric.""" +class ToolParameterKeyMatchResults(_common.BaseModel): + """Results for tool parameter key match metric.""" - trajectory_recall_metric_values: Optional[ - list[TrajectoryRecallMetricValue] + tool_parameter_key_match_metric_values: Optional[ + list[ToolParameterKeyMatchMetricValue] ] = Field( default=None, - description="""Output only. TrajectoryRecall metric values.""", + description="""Output only. Tool parameter key match metric values.""", ) -class TrajectoryRecallResultsDict(TypedDict, total=False): - """Results for TrajectoryRecall metric.""" +class ToolParameterKeyMatchResultsDict(TypedDict, total=False): + """Results for tool parameter key match metric.""" - trajectory_recall_metric_values: Optional[list[TrajectoryRecallMetricValueDict]] - """Output only. TrajectoryRecall metric values.""" + tool_parameter_key_match_metric_values: Optional[ + list[ToolParameterKeyMatchMetricValueDict] + ] + """Output only. Tool parameter key match metric values.""" -TrajectoryRecallResultsOrDict = Union[ - TrajectoryRecallResults, TrajectoryRecallResultsDict +ToolParameterKeyMatchResultsOrDict = Union[ + ToolParameterKeyMatchResults, ToolParameterKeyMatchResultsDict ] -class TrajectorySingleToolUseMetricValue(_common.BaseModel): - """TrajectorySingleToolUse metric value for an instance.""" +class ToolParameterKVMatchMetricValue(_common.BaseModel): + """Tool parameter key value match metric value for an instance.""" score: Optional[float] = Field( default=None, - description="""Output only. TrajectorySingleToolUse score.""", + description="""Output only. Tool parameter key value match score.""", ) -class TrajectorySingleToolUseMetricValueDict(TypedDict, total=False): - """TrajectorySingleToolUse metric value for an instance.""" +class ToolParameterKVMatchMetricValueDict(TypedDict, total=False): + """Tool parameter key value match metric value for an instance.""" score: Optional[float] - """Output only. TrajectorySingleToolUse score.""" + """Output only. Tool parameter key value match score.""" -TrajectorySingleToolUseMetricValueOrDict = Union[ - TrajectorySingleToolUseMetricValue, TrajectorySingleToolUseMetricValueDict +ToolParameterKVMatchMetricValueOrDict = Union[ + ToolParameterKVMatchMetricValue, ToolParameterKVMatchMetricValueDict ] -class TrajectorySingleToolUseResults(_common.BaseModel): - """Results for TrajectorySingleToolUse metric.""" +class ToolParameterKVMatchResults(_common.BaseModel): + """Results for tool parameter key value match metric.""" - trajectory_single_tool_use_metric_values: Optional[ - list[TrajectorySingleToolUseMetricValue] + tool_parameter_kv_match_metric_values: Optional[ + list[ToolParameterKVMatchMetricValue] ] = Field( default=None, - description="""Output only. TrajectorySingleToolUse metric values.""", + description="""Output only. Tool parameter key value match metric values.""", ) -class TrajectorySingleToolUseResultsDict(TypedDict, total=False): - """Results for TrajectorySingleToolUse metric.""" +class ToolParameterKVMatchResultsDict(TypedDict, total=False): + """Results for tool parameter key value match metric.""" - trajectory_single_tool_use_metric_values: Optional[ - list[TrajectorySingleToolUseMetricValueDict] + tool_parameter_kv_match_metric_values: Optional[ + list[ToolParameterKVMatchMetricValueDict] ] - """Output only. TrajectorySingleToolUse metric values.""" + """Output only. Tool parameter key value match metric values.""" -TrajectorySingleToolUseResultsOrDict = Union[ - TrajectorySingleToolUseResults, TrajectorySingleToolUseResultsDict +ToolParameterKVMatchResultsOrDict = Union[ + ToolParameterKVMatchResults, ToolParameterKVMatchResultsDict ] class EvaluateInstancesResponse(_common.BaseModel): """Result of evaluating an LLM metric.""" + rubric_based_metric_result: Optional[RubricBasedMetricResult] = Field( + default=None, description="""Result for rubric based metric.""" + ) bleu_results: Optional[BleuResults] = Field( default=None, description="""Results for bleu metric.""" ) @@ -1989,16 +2035,6 @@ class EvaluateInstancesResponse(_common.BaseModel): rouge_results: Optional[RougeResults] = Field( default=None, description="""Results for rouge metric.""" ) - rubric_based_instruction_following_result: Optional[ - RubricBasedInstructionFollowingResult - ] = Field( - default=None, - description="""Result for rubric based instruction following metric.""", - ) - summarization_verbosity_result: Optional[SummarizationVerbosityResult] = Field( - default=None, - description="""Result for summarization verbosity metric.""", - ) tool_call_valid_results: Optional[ToolCallValidResults] = Field( default=None, description="""Tool call metrics. Results for tool call valid metric.""", @@ -2014,37 +2050,14 @@ class EvaluateInstancesResponse(_common.BaseModel): default=None, description="""Results for tool parameter key value match metric.""", ) - trajectory_any_order_match_results: Optional[ - TrajectoryAnyOrderMatchResults - ] = Field( - default=None, - description="""Result for trajectory any order match metric.""", - ) - trajectory_exact_match_results: Optional[TrajectoryExactMatchResults] = Field( - default=None, - description="""Result for trajectory exact match metric.""", - ) - trajectory_in_order_match_results: Optional[TrajectoryInOrderMatchResults] = Field( - default=None, - description="""Result for trajectory in order match metric.""", - ) - trajectory_precision_results: Optional[TrajectoryPrecisionResults] = Field( - default=None, description="""Result for trajectory precision metric.""" - ) - trajectory_recall_results: Optional[TrajectoryRecallResults] = Field( - default=None, description="""Results for trajectory recall metric.""" - ) - trajectory_single_tool_use_results: Optional[ - TrajectorySingleToolUseResults - ] = Field( - default=None, - description="""Results for trajectory single tool use metric.""", - ) class EvaluateInstancesResponseDict(TypedDict, total=False): """Result of evaluating an LLM metric.""" + rubric_based_metric_result: Optional[RubricBasedMetricResultDict] + """Result for rubric based metric.""" + bleu_results: Optional[BleuResultsDict] """Results for bleu metric.""" @@ -2066,14 +2079,6 @@ class EvaluateInstancesResponseDict(TypedDict, total=False): rouge_results: Optional[RougeResultsDict] """Results for rouge metric.""" - rubric_based_instruction_following_result: Optional[ - RubricBasedInstructionFollowingResultDict - ] - """Result for rubric based instruction following metric.""" - - summarization_verbosity_result: Optional[SummarizationVerbosityResultDict] - """Result for summarization verbosity metric.""" - tool_call_valid_results: Optional[ToolCallValidResultsDict] """Tool call metrics. Results for tool call valid metric.""" @@ -2086,80 +2091,12 @@ class EvaluateInstancesResponseDict(TypedDict, total=False): tool_parameter_kv_match_results: Optional[ToolParameterKVMatchResultsDict] """Results for tool parameter key value match metric.""" - trajectory_any_order_match_results: Optional[TrajectoryAnyOrderMatchResultsDict] - """Result for trajectory any order match metric.""" - - trajectory_exact_match_results: Optional[TrajectoryExactMatchResultsDict] - """Result for trajectory exact match metric.""" - - trajectory_in_order_match_results: Optional[TrajectoryInOrderMatchResultsDict] - """Result for trajectory in order match metric.""" - - trajectory_precision_results: Optional[TrajectoryPrecisionResultsDict] - """Result for trajectory precision metric.""" - - trajectory_recall_results: Optional[TrajectoryRecallResultsDict] - """Results for trajectory recall metric.""" - - trajectory_single_tool_use_results: Optional[TrajectorySingleToolUseResultsDict] - """Results for trajectory single tool use metric.""" - EvaluateInstancesResponseOrDict = Union[ EvaluateInstancesResponse, EvaluateInstancesResponseDict ] -class RubricGenerationSpec(_common.BaseModel): - """Spec for generating rubrics.""" - - prompt_template: Optional[str] = Field( - default=None, - description="""Template for the prompt used to generate rubrics. - The details should be updated based on the most-recent recipe requirements.""", - ) - generator_model_config: Optional[AutoraterConfig] = Field( - default=None, - description="""Configuration for the model used in rubric generation. - Configs including sampling count and base model can be specified here. - Flipping is not supported for rubric generation.""", - ) - rubric_content_type: Optional[RubricContentType] = Field( - default=None, - description="""The type of rubric content to be generated.""", - ) - rubric_type_ontology: Optional[list[str]] = Field( - default=None, - description="""An optional, pre-defined list of allowed types for generated rubrics. - If this field is provided, it implies `include_rubric_type` should be true, - and the generated rubric types should be chosen from this ontology.""", - ) - - -class RubricGenerationSpecDict(TypedDict, total=False): - """Spec for generating rubrics.""" - - prompt_template: Optional[str] - """Template for the prompt used to generate rubrics. - The details should be updated based on the most-recent recipe requirements.""" - - generator_model_config: Optional[AutoraterConfigDict] - """Configuration for the model used in rubric generation. - Configs including sampling count and base model can be specified here. - Flipping is not supported for rubric generation.""" - - rubric_content_type: Optional[RubricContentType] - """The type of rubric content to be generated.""" - - rubric_type_ontology: Optional[list[str]] - """An optional, pre-defined list of allowed types for generated rubrics. - If this field is provided, it implies `include_rubric_type` should be true, - and the generated rubric types should be chosen from this ontology.""" - - -RubricGenerationSpecOrDict = Union[RubricGenerationSpec, RubricGenerationSpecDict] - - class RubricGenerationConfig(_common.BaseModel): """Config for generating rubrics.""" @@ -2210,102 +2147,6 @@ class _GenerateInstanceRubricsRequestDict(TypedDict, total=False): ] -class RubricContentProperty(_common.BaseModel): - """Defines criteria based on a specific property.""" - - description: Optional[str] = Field( - default=None, - description="""Description of the property being evaluated. - Example: "The model's response is grammatically correct." """, - ) - - -class RubricContentPropertyDict(TypedDict, total=False): - """Defines criteria based on a specific property.""" - - description: Optional[str] - """Description of the property being evaluated. - Example: "The model's response is grammatically correct." """ - - -RubricContentPropertyOrDict = Union[RubricContentProperty, RubricContentPropertyDict] - - -class RubricContent(_common.BaseModel): - """Content of the rubric, defining the testable criteria.""" - - property: Optional[RubricContentProperty] = Field( - default=None, - description="""Evaluation criteria based on a specific property.""", - ) - - -class RubricContentDict(TypedDict, total=False): - """Content of the rubric, defining the testable criteria.""" - - property: Optional[RubricContentPropertyDict] - """Evaluation criteria based on a specific property.""" - - -RubricContentOrDict = Union[RubricContent, RubricContentDict] - - -class Rubric(_common.BaseModel): - """Message representing a single testable criterion for evaluation. - - One input prompt could have multiple rubrics. - """ - - rubric_id: Optional[str] = Field( - default=None, - description="""Required. Unique identifier for the rubric. - This ID is used to refer to this rubric, e.g., in RubricVerdict.""", - ) - content: Optional[RubricContent] = Field( - default=None, - description="""Required. The actual testable criteria for the rubric.""", - ) - type: Optional[str] = Field( - default=None, - description="""Optional. A type designator for the rubric, which can inform how it's - evaluated or interpreted by systems or users. - It's recommended to use consistent, well-defined, upper snake_case strings. - Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", - "INSTRUCTION_ADHERENCE".""", - ) - importance: Optional[Importance] = Field( - default=None, - description="""Optional. The relative importance of this rubric.""", - ) - - -class RubricDict(TypedDict, total=False): - """Message representing a single testable criterion for evaluation. - - One input prompt could have multiple rubrics. - """ - - rubric_id: Optional[str] - """Required. Unique identifier for the rubric. - This ID is used to refer to this rubric, e.g., in RubricVerdict.""" - - content: Optional[RubricContentDict] - """Required. The actual testable criteria for the rubric.""" - - type: Optional[str] - """Optional. A type designator for the rubric, which can inform how it's - evaluated or interpreted by systems or users. - It's recommended to use consistent, well-defined, upper snake_case strings. - Examples: "SUMMARIZATION_QUALITY", "SAFETY_HARMFUL_CONTENT", - "INSTRUCTION_ADHERENCE".""" - - importance: Optional[Importance] - """Optional. The relative importance of this rubric.""" - - -RubricOrDict = Union[Rubric, RubricDict] - - class GenerateInstanceRubricsResponse(_common.BaseModel): """Response for generating rubrics.""" @@ -6463,13 +6304,13 @@ class EvalCaseMetricResult(_common.BaseModel): explanation: Optional[str] = Field( default=None, description="""Explanation of the metric.""" ) + rubric_verdicts: Optional[list[RubricVerdict]] = Field( + default=None, + description="""The details of all the rubrics and their verdicts for rubric-based metrics.""", + ) raw_output: Optional[list[str]] = Field( default=None, description="""Raw output of the metric.""" ) - rubrics: Optional[list[str]] = Field( - default=None, - description="""A list of rubrics used to evaluate the example for rubric-based metrics.""", - ) error_message: Optional[str] = Field( default=None, description="""Error message for the metric.""" ) @@ -6487,12 +6328,12 @@ class EvalCaseMetricResultDict(TypedDict, total=False): explanation: Optional[str] """Explanation of the metric.""" + rubric_verdicts: Optional[list[RubricVerdictDict]] + """The details of all the rubrics and their verdicts for rubric-based metrics.""" + raw_output: Optional[list[str]] """Raw output of the metric.""" - rubrics: Optional[list[str]] - """A list of rubrics used to evaluate the example for rubric-based metrics.""" - error_message: Optional[str] """Error message for the metric.""" @@ -6855,6 +6696,43 @@ class EvaluateDatasetOperationDict(TypedDict, total=False): ] +class RubricGroup(_common.BaseModel): + """A group of rubrics, used for grouping rubrics based on a metric or a version.""" + + group_id: Optional[str] = Field( + default=None, description="""Unique identifier for the group.""" + ) + display_name: Optional[str] = Field( + default=None, + description="""Human-readable name for the group. This should be unique + within a given context if used for display or selection. + Example: "Instruction Following V1", "Content Quality - Summarization + Task".""", + ) + rubrics: Optional[list[Rubric]] = Field( + default=None, description="""Rubrics that are part of this group.""" + ) + + +class RubricGroupDict(TypedDict, total=False): + """A group of rubrics, used for grouping rubrics based on a metric or a version.""" + + group_id: Optional[str] + """Unique identifier for the group.""" + + display_name: Optional[str] + """Human-readable name for the group. This should be unique + within a given context if used for display or selection. + Example: "Instruction Following V1", "Content Quality - Summarization + Task".""" + + rubrics: Optional[list[RubricDict]] + """Rubrics that are part of this group.""" + + +RubricGroupOrDict = Union[RubricGroup, RubricGroupDict] + + class AgentEngine(_common.BaseModel): """An agent engine instance.""" From 8321826fe3c18b8b938861fe0930aa7ec4e97fa3 Mon Sep 17 00:00:00 2001 From: Yeesian Ng Date: Tue, 15 Jul 2025 08:33:16 -0700 Subject: [PATCH 10/13] feat: GenAI SDK client - Add support for context specs when creating agent engine instances PiperOrigin-RevId: 783344749 --- .../genai/replays/test_create_agent_engine.py | 28 +++ .../unit/vertexai/genai/test_agent_engines.py | 1 + vertexai/_genai/agent_engines.py | 46 +++++ vertexai/_genai/types.py | 169 ++++++++++-------- 4 files changed, 170 insertions(+), 74 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_create_agent_engine.py b/tests/unit/vertexai/genai/replays/test_create_agent_engine.py index f5320ae5f0..074ee79cf9 100644 --- a/tests/unit/vertexai/genai/replays/test_create_agent_engine.py +++ b/tests/unit/vertexai/genai/replays/test_create_agent_engine.py @@ -38,6 +38,34 @@ def test_create_config_lightweight(client): } +def test_create_with_context_spec(client): + project = "test-project" + location = "us-central1" + parent = f"projects/{project}/locations/{location}" + generation_model = f"{parent}/publishers/google/models/gemini-2.0-flash-001" + embedding_model = f"{parent}/publishers/google/models/text-embedding-005" + + agent_engine = client.agent_engines.create( + config={ + "context_spec": { + "memory_bank_config": { + "generation_config": {"model": generation_model}, + "similarity_search_config": { + "embedding_model": embedding_model, + }, + }, + }, + "http_options": {"api_version": "v1beta1"}, + }, + ) + agent_engine = client.agent_engines.get(name=agent_engine.api_resource.name) + memory_bank_config = agent_engine.api_resource.context_spec.memory_bank_config + assert memory_bank_config.generation_config.model == generation_model + assert ( + memory_bank_config.similarity_search_config.embedding_model == embedding_model + ) + + pytestmark = pytest_helper.setup( file=__file__, globals_for_file=globals(), diff --git a/tests/unit/vertexai/genai/test_agent_engines.py b/tests/unit/vertexai/genai/test_agent_engines.py index 39b9447980..6a562b94be 100644 --- a/tests/unit/vertexai/genai/test_agent_engines.py +++ b/tests/unit/vertexai/genai/test_agent_engines.py @@ -1110,6 +1110,7 @@ def test_create_agent_engine_with_env_vars_dict( gcs_dir_name=None, extra_packages=[_TEST_AGENT_ENGINE_EXTRA_PACKAGE_PATH], env_vars=_TEST_AGENT_ENGINE_ENV_VARS_INPUT, + context_spec=None, ) request_mock.assert_called_with( "post", diff --git a/vertexai/_genai/agent_engines.py b/vertexai/_genai/agent_engines.py index 5f41e817e9..58bd9f25a4 100644 --- a/vertexai/_genai/agent_engines.py +++ b/vertexai/_genai/agent_engines.py @@ -63,6 +63,21 @@ def _ReasoningEngineSpec_to_vertex( return to_object +def _ReasoningEngineContextSpec_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["memory_bank_config"]) is not None: + setv( + to_object, + ["memoryBankConfig"], + getv(from_object, ["memory_bank_config"]), + ) + + return to_object + + def _CreateAgentEngineConfig_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -82,6 +97,15 @@ def _CreateAgentEngineConfig_to_vertex( _ReasoningEngineSpec_to_vertex(getv(from_object, ["spec"]), to_object), ) + if getv(from_object, ["context_spec"]) is not None: + setv( + parent_object, + ["contextSpec"], + _ReasoningEngineContextSpec_to_vertex( + getv(from_object, ["context_spec"]), to_object + ), + ) + return to_object @@ -550,6 +574,15 @@ def _UpdateAgentEngineConfig_to_vertex( _ReasoningEngineSpec_to_vertex(getv(from_object, ["spec"]), to_object), ) + if getv(from_object, ["context_spec"]) is not None: + setv( + parent_object, + ["contextSpec"], + _ReasoningEngineContextSpec_to_vertex( + getv(from_object, ["context_spec"]), to_object + ), + ) + if getv(from_object, ["update_mask"]) is not None: setv( parent_object, @@ -1976,6 +2009,10 @@ def create( "config must be a dict or AgentEngineConfig, but got" f" {type(config)}." ) + context_spec = config.context_spec + if context_spec is not None: + # Conversion to a dict for _create_config + context_spec = context_spec.model_dump() api_config = self._create_config( mode="create", agent_engine=agent_engine, @@ -1986,6 +2023,7 @@ def create( gcs_dir_name=config.gcs_dir_name, extra_packages=config.extra_packages, env_vars=config.env_vars, + context_spec=context_spec, ) operation = self._create(config=api_config) # TODO: Use a more specific link. @@ -2029,6 +2067,7 @@ def _create_config( gcs_dir_name: Optional[str] = None, extra_packages: Optional[Sequence[str]] = None, env_vars: Optional[dict[str, Union[str, Any]]] = None, + context_spec: Optional[dict[str, Any]] = None, ): import sys from vertexai.agent_engines import _agent_engines @@ -2049,6 +2088,8 @@ def _create_config( if description is not None: update_masks.append("description") config["description"] = description + if context_spec is not None: + config["context_spec"] = context_spec if agent_engine is not None: sys_version = f"{sys.version_info.major}.{sys.version_info.minor}" gcs_dir_name = gcs_dir_name or _agent_engines._DEFAULT_GCS_DIR_NAME @@ -2307,6 +2348,10 @@ def update( "config must be a dict or AgentEngineConfig, but got" f" {type(config)}." ) + context_spec = config.context_spec + if context_spec is not None: + # Conversion to a dict for _create_config + context_spec = context_spec.model_dump() api_config = self._create_config( mode="update", agent_engine=agent_engine, @@ -2317,6 +2362,7 @@ def update( gcs_dir_name=config.gcs_dir_name, extra_packages=config.extra_packages, env_vars=config.env_vars, + context_spec=context_spec, ) operation = self._update(name=name, config=api_config) logger.info( diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index b2631b20c3..3e6055515f 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -3127,7 +3127,7 @@ class ReasoningEngineSpecPackageSpecDict(TypedDict, total=False): class ReasoningEngineSpec(_common.BaseModel): - """The specification of a Reasoning Engine.""" + """The specification of an agent engine.""" agent_framework: Optional[str] = Field( default=None, @@ -3148,7 +3148,7 @@ class ReasoningEngineSpec(_common.BaseModel): class ReasoningEngineSpecDict(TypedDict, total=False): - """The specification of a Reasoning Engine.""" + """The specification of an agent engine.""" agent_framework: Optional[str] """Optional. The OSS agent framework used to develop the agent. Currently supported values: "google-adk", "langchain", "langgraph", "ag2", "llama-index", "custom".""" @@ -3166,72 +3166,6 @@ class ReasoningEngineSpecDict(TypedDict, total=False): ReasoningEngineSpecOrDict = Union[ReasoningEngineSpec, ReasoningEngineSpecDict] -class CreateAgentEngineConfig(_common.BaseModel): - """Config for create agent engine.""" - - http_options: Optional[HttpOptions] = Field( - default=None, description="""Used to override HTTP request options.""" - ) - display_name: Optional[str] = Field( - default=None, - description="""The user-defined name of the Agent Engine. - - The display name can be up to 128 characters long and can comprise any - UTF-8 characters. - """, - ) - description: Optional[str] = Field( - default=None, description="""The description of the Agent Engine.""" - ) - spec: Optional[ReasoningEngineSpec] = Field( - default=None, - description="""Optional. Configurations of the ReasoningEngine.""", - ) - - -class CreateAgentEngineConfigDict(TypedDict, total=False): - """Config for create agent engine.""" - - http_options: Optional[HttpOptionsDict] - """Used to override HTTP request options.""" - - display_name: Optional[str] - """The user-defined name of the Agent Engine. - - The display name can be up to 128 characters long and can comprise any - UTF-8 characters. - """ - - description: Optional[str] - """The description of the Agent Engine.""" - - spec: Optional[ReasoningEngineSpecDict] - """Optional. Configurations of the ReasoningEngine.""" - - -CreateAgentEngineConfigOrDict = Union[ - CreateAgentEngineConfig, CreateAgentEngineConfigDict -] - - -class _CreateAgentEngineRequestParameters(_common.BaseModel): - """Parameters for creating agent engines.""" - - config: Optional[CreateAgentEngineConfig] = Field(default=None, description="""""") - - -class _CreateAgentEngineRequestParametersDict(TypedDict, total=False): - """Parameters for creating agent engines.""" - - config: Optional[CreateAgentEngineConfigDict] - """""" - - -_CreateAgentEngineRequestParametersOrDict = Union[ - _CreateAgentEngineRequestParameters, _CreateAgentEngineRequestParametersDict -] - - class ReasoningEngineContextSpecMemoryBankConfigGenerationConfig(_common.BaseModel): """Configuration for how to generate memories.""" @@ -3320,7 +3254,7 @@ class ReasoningEngineContextSpecMemoryBankConfigDict(TypedDict, total=False): class ReasoningEngineContextSpec(_common.BaseModel): - """Configuration for how Agent Engine sub-resources should manage context.""" + """The configuration for agent engine sub-resources to manage context.""" memory_bank_config: Optional[ReasoningEngineContextSpecMemoryBankConfig] = Field( default=None, @@ -3329,7 +3263,7 @@ class ReasoningEngineContextSpec(_common.BaseModel): class ReasoningEngineContextSpecDict(TypedDict, total=False): - """Configuration for how Agent Engine sub-resources should manage context.""" + """The configuration for agent engine sub-resources to manage context.""" memory_bank_config: Optional[ReasoningEngineContextSpecMemoryBankConfigDict] """Optional. Specification for a Memory Bank, which manages memories for the Agent Engine.""" @@ -3340,6 +3274,79 @@ class ReasoningEngineContextSpecDict(TypedDict, total=False): ] +class CreateAgentEngineConfig(_common.BaseModel): + """Config for create agent engine.""" + + http_options: Optional[HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + display_name: Optional[str] = Field( + default=None, + description="""The user-defined name of the Agent Engine. + + The display name can be up to 128 characters long and can comprise any + UTF-8 characters. + """, + ) + description: Optional[str] = Field( + default=None, description="""The description of the Agent Engine.""" + ) + spec: Optional[ReasoningEngineSpec] = Field( + default=None, + description="""Optional. Configurations of the Agent Engine.""", + ) + context_spec: Optional[ReasoningEngineContextSpec] = Field( + default=None, + description="""Optional. The context spec to be used for the Agent Engine.""", + ) + + +class CreateAgentEngineConfigDict(TypedDict, total=False): + """Config for create agent engine.""" + + http_options: Optional[HttpOptionsDict] + """Used to override HTTP request options.""" + + display_name: Optional[str] + """The user-defined name of the Agent Engine. + + The display name can be up to 128 characters long and can comprise any + UTF-8 characters. + """ + + description: Optional[str] + """The description of the Agent Engine.""" + + spec: Optional[ReasoningEngineSpecDict] + """Optional. Configurations of the Agent Engine.""" + + context_spec: Optional[ReasoningEngineContextSpecDict] + """Optional. The context spec to be used for the Agent Engine.""" + + +CreateAgentEngineConfigOrDict = Union[ + CreateAgentEngineConfig, CreateAgentEngineConfigDict +] + + +class _CreateAgentEngineRequestParameters(_common.BaseModel): + """Parameters for creating agent engines.""" + + config: Optional[CreateAgentEngineConfig] = Field(default=None, description="""""") + + +class _CreateAgentEngineRequestParametersDict(TypedDict, total=False): + """Parameters for creating agent engines.""" + + config: Optional[CreateAgentEngineConfigDict] + """""" + + +_CreateAgentEngineRequestParametersOrDict = Union[ + _CreateAgentEngineRequestParameters, _CreateAgentEngineRequestParametersDict +] + + class ReasoningEngine(_common.BaseModel): """An agent engine.""" @@ -4374,7 +4381,7 @@ class _GenerateAgentEngineMemoriesRequestParametersDict(TypedDict, total=False): class GenerateMemoriesResponseGeneratedMemory(_common.BaseModel): - """A memmory that was generated.""" + """A memory that was generated.""" memory: Optional[Memory] = Field( default=None, description="""The generated memory.""" @@ -4385,7 +4392,7 @@ class GenerateMemoriesResponseGeneratedMemory(_common.BaseModel): class GenerateMemoriesResponseGeneratedMemoryDict(TypedDict, total=False): - """A memmory that was generated.""" + """A memory that was generated.""" memory: Optional[MemoryDict] """The generated memory.""" @@ -5125,7 +5132,11 @@ class UpdateAgentEngineConfig(_common.BaseModel): ) spec: Optional[ReasoningEngineSpec] = Field( default=None, - description="""Optional. Configurations of the ReasoningEngine.""", + description="""Optional. Configurations of the Agent Engine.""", + ) + context_spec: Optional[ReasoningEngineContextSpec] = Field( + default=None, + description="""Optional. The context spec to be used for the Agent Engine.""", ) update_mask: Optional[str] = Field( default=None, @@ -5151,7 +5162,10 @@ class UpdateAgentEngineConfigDict(TypedDict, total=False): """The description of the Agent Engine.""" spec: Optional[ReasoningEngineSpecDict] - """Optional. Configurations of the ReasoningEngine.""" + """Optional. Configurations of the Agent Engine.""" + + context_spec: Optional[ReasoningEngineContextSpecDict] + """Optional. The context spec to be used for the Agent Engine.""" update_mask: Optional[str] """The update mask to apply. For the `FieldMask` definition, see @@ -6849,6 +6863,10 @@ class AgentEngineConfig(_common.BaseModel): If it is a dictionary, the keys are the environment variable names, and the values are the corresponding values.""", ) + context_spec: Optional[ReasoningEngineContextSpec] = Field( + default=None, + description="""The context spec to be used for the Agent Engine.""", + ) class AgentEngineConfigDict(TypedDict, total=False): @@ -6892,5 +6910,8 @@ class AgentEngineConfigDict(TypedDict, total=False): If it is a dictionary, the keys are the environment variable names, and the values are the corresponding values.""" + context_spec: Optional[ReasoningEngineContextSpecDict] + """The context spec to be used for the Agent Engine.""" + AgentEngineConfigOrDict = Union[AgentEngineConfig, AgentEngineConfigDict] From c49aa4065e4407968af45a2bd41f60854f19600f Mon Sep 17 00:00:00 2001 From: Amy Wu Date: Tue, 15 Jul 2025 12:22:09 -0700 Subject: [PATCH 11/13] chore: bump setuptools version lower bound PiperOrigin-RevId: 783428198 --- setup.py | 2 -- testing/constraints-ray-2.9.3.txt | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8bf7544224..d81766cf9d 100644 --- a/setup.py +++ b/setup.py @@ -110,8 +110,6 @@ " 2.36.*, !=2.37.*, !=2.38.*, !=2.39.*, !=2.40.*, !=2.41.*;" " python_version<'3.11'" ), - # To avoid ImportError: cannot import name 'packaging' from 'pkg_resources' - "setuptools < 70.0.0", # Ray Data v2.4 in Python 3.11 is broken, but got fixed in Ray v2.5. "ray[default] >= 2.5, <= 2.42.0; python_version=='3.11'", "google-cloud-bigquery-storage", diff --git a/testing/constraints-ray-2.9.3.txt b/testing/constraints-ray-2.9.3.txt index 968d558150..c4c1ea816c 100644 --- a/testing/constraints-ray-2.9.3.txt +++ b/testing/constraints-ray-2.9.3.txt @@ -1,5 +1,6 @@ ray==2.9.3 # Below constraints are inherited from constraints-3.10.txt +setuptools<70.0.0 google-api-core proto-plus==1.22.3 protobuf From 36bfda246eeb2b2a171cec9fb2602f4802601b7d Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Tue, 15 Jul 2025 14:27:03 -0700 Subject: [PATCH 12/13] feat: GenAI SDK client(evals) - Add support for rubric-based metrics, and rubric customization eval workflow PiperOrigin-RevId: 783472488 --- .../replays/test_public_generate_rubrics.py | 181 ++++++++++ tests/unit/vertexai/genai/test_evals.py | 124 +++---- vertexai/_genai/_evals_metric_handlers.py | 337 +++++++++++------- vertexai/_genai/evals.py | 114 ++++++ vertexai/_genai/types.py | 66 +++- 5 files changed, 605 insertions(+), 217 deletions(-) create mode 100644 tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py diff --git a/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py b/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py new file mode 100644 index 0000000000..a5e5826745 --- /dev/null +++ b/tests/unit/vertexai/genai/replays/test_public_generate_rubrics.py @@ -0,0 +1,181 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + + +from tests.unit.vertexai.genai.replays import pytest_helper +from vertexai._genai import types +import pandas as pd + +_TEST_RUBRIC_GENERATION_PROMPT = """SPECIAL INSTRUCTION: think silently. Silent thinking token budget: 16384. + +You are a teacher who is responsible for scoring a student\'s response to a prompt. In order to score that response, you must write down a rubric for each prompt. That rubric states what properties the response must have in order to be a valid response to the prompt. Properties are weighted by importance via the "importance" field. + +Rubric requirements: +- Properties either exist or don\'t exist. +- Properties can be either implicit in the prompt or made explicit by the prompt. +- Make sure to always include the correct expected human language as one of the properties. If the prompt asks for code, the programming language should be covered by a separate property. +- The correct expected language may be explicit in the text of the prompt but is usually simply implicit in the prompt itself. +- Be as comprehensive as possible with the list of properties in the rubric. +- All properties in the rubric must be in English, regardless of the language of the prompt. +- Rubric properties should not specify correct answers in their descriptions, e.g. to math and factoid questions if the prompt calls for such an answer. Rather, it should check that the response contains an answer and optional supporting evidence if relevant, and assume some other process will later validate correctness. A rubric property should however call out any false premises present in the prompt. + +About importance: +- Most properties will be of medium importance by default. +- Properties of high importance are critical to be fulfilled in a good response. +- Properties of low importance are considered optional or supplementary nice-to-haves. + +You will see prompts in many different languages, not just English. For each prompt you see, you will write down this rubric in JSON format. + +IMPORTANT: Never respond to the prompt given. Only write a rubric. + +Example: +What is the tallest building in the world? + +```json +{ + "criteria":[ + { + "rubric_id": "00001", + "property": "The response is in English.", + "type": "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE", + "importance": "high" + }, + { + "rubric_id": "00002", + "property": "Contains the name of the tallest building in the world.", + "type": "QA_ANSWER:FACTOID", + "importance": "high" + }, + { + "rubric_id": "00003", + "property": "Contains the exact height of the tallest building.", + "type": "QA_SUPPORTING_EVIDENCE:HEIGHT", + "importance": "low" + }, + { + "rubric_id": "00004", + "property": "Contains the location of the tallest building.", + "type": "QA_SUPPORTING_EVIDENCE:LOCATION", + "importance": "low" + }, + ... + ] +} +``` + +Write me a letter to my HOA asking them to reconsider the fees they are asking me to pay because I haven\'t mowed my lawn on time. I have been very busy at work. +```json +{ + "criteria": [ + { + "rubric_id": "00001", + "property": "The response is in English.", + "type": "LANGUAGE:PRIMARY_RESPONSE_LANGUAGE", + "importance": "high" + }, + { + "rubric_id": "00002", + "property": "The response is formatted as a letter.", + "type": "FORMAT_REQUIREMENT:FORMAL_LETTER", + "importance": "medium" + }, + { + "rubric_id": "00003", + "property": "The letter is addressed to the Homeowners Association (HOA).", + "type": "CONTENT_REQUIREMENT:ADDRESSEE", + "importance": "medium" + }, + { + "rubric_id": "00004", + "property": "The letter explains that the sender has not mowed their lawn on time.", + "type": "CONTENT_REQUIREMENT:BACKGROUND_CONTEXT:TARDINESS", + "importance": "medium" + }, + { + "rubric_id": "00005", + "property": "The letter provides a reason for not mowing the lawn, specifically being busy at work.", + "type": "CONTENT_REQUIREMENT:EXPLANATION:EXCUSE:BUSY", + "importance": "medium" + }, + { + "rubric_id": "00006", + "property": "The letter discusses that the sender has been in compliance until now.", + "type": "OPTIONAL_CONTENT:SUPPORTING_EVIDENCE:COMPLIANCE", + "importance": "low" + }, + { + "rubric_id": "00007", + "property": "The letter requests that the HOA reconsider the fees associated with not mowing the lawn on time.", + "type": "CONTENT_REQUIREMENT:REQUEST:FEE_WAIVER", + "importance": "high" + }, + { + "rubric_id": "00008", + "property": "The letter maintains a polite and respectful tone.", + "type": "CONTENT_REQUIREMENT:FORMALITY:FORMAL", + "importance": "high" + }, + { + "rubric_id": "00009", + "property": "The letter includes a closing (e.g., \'Sincerely\') and the sender\'s name.", + "type": "CONTENT_REQUIREMENT:SIGNATURE", + "importance": "medium" + } + ] +} +``` + +Now write a rubric for the following user prompt. Remember to write only the rubric, NOT response to the prompt. + +User prompt: +{prompt}""" + + +def test_public_method_generate_rubrics(client): + """Tests the public generate_rubrics method.""" + prompts_df = pd.DataFrame( + { + "prompt": [ + "Explain the theory of relativity in one sentence.", + "Write a short poem about a cat.", + ] + } + ) + data_with_rubrics = client.evals.generate_rubrics( + src=prompts_df, + prompt_template=_TEST_RUBRIC_GENERATION_PROMPT, + rubric_group_name="text_quality_rubrics", + ) + + # Assertions focus on the returned DataFrame + assert isinstance(data_with_rubrics, pd.DataFrame) + assert "rubric_groups" in data_with_rubrics.columns + assert len(data_with_rubrics) == 2 + + # Check the structure of the first row's rubric_groups + first_rubric_group = data_with_rubrics["rubric_groups"][0] + assert isinstance(first_rubric_group, dict) + assert "text_quality_rubrics" in first_rubric_group + assert isinstance(first_rubric_group["text_quality_rubrics"], list) + assert first_rubric_group["text_quality_rubrics"] + assert isinstance(first_rubric_group["text_quality_rubrics"][0], types.Rubric) + + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), + test_method="evals.generate_rubrics", +) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 8e0559744e..67bb475dd9 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -17,9 +17,9 @@ import os import statistics from unittest import mock -import google.auth.credentials import warnings +import google.auth.credentials from google.cloud import aiplatform import vertexai from google.cloud.aiplatform import initializer as aiplatform_initializer @@ -45,6 +45,16 @@ pytestmark = pytest.mark.usefixtures("google_auth_mock") +def _create_content_dump(text: str) -> dict[str, list[genai_types.Content]]: + return { + "contents": [ + genai_types.Content(parts=[genai_types.Part(text=text)]).model_dump( + mode="json", exclude_none=True + ) + ] + } + + @pytest.fixture def mock_api_client_fixture(): mock_client = mock.Mock(spec=client.Client) @@ -2709,15 +2719,11 @@ def setup_method(self): def test_build_request_payload_basic_filtering_and_fields(self): metric = vertexai_genai_types.LLMMetric( name="test_quality", - prompt_template=( - "Eval: {prompt} with {response}. Context: " - "{custom_context}. Ref: {reference}" - ), + prompt_template="Eval: {prompt} with {response}. Context: {custom_context}. Ref: {reference}", ) handler = _evals_metric_handlers.LLMMetricHandler( module=self.mock_evals_module, metric=metric ) - eval_case = vertexai_genai_types.EvalCase( prompt=genai_types.Content( parts=[genai_types.Part(text="User prompt text")] @@ -2734,52 +2740,35 @@ def test_build_request_payload_basic_filtering_and_fields(self): parts=[genai_types.Part(text="Ground truth text")] ) ), - custom_context="Custom context value.", # pylint: disable=unexpected-keyword-arg - extra_field_not_in_template="This should be excluded.", # pylint: disable=unexpected-keyword-arg + custom_context="Custom context value.", + extra_field_not_in_template="This should be excluded.", eval_case_id="case-123", ) payload = handler._build_request_payload(eval_case=eval_case, response_index=0) - expected_json_instance_dict = { - "prompt": "User prompt text", - "response": "Model response text", - "custom_context": "Custom context value.", - "reference": "Ground truth text", + expected_content_map = { + "prompt": _create_content_dump("User prompt text"), + "response": _create_content_dump("Model response text"), + "custom_context": _create_content_dump("Custom context value."), + "reference": _create_content_dump("Ground truth text"), } + actual_content_map_dict = payload["pointwise_metric_input"]["instance"][ + "content_map_instance" + ]["values"] - actual_json_instance_str = payload["pointwise_metric_input"]["instance"][ - "json_instance" - ] - actual_json_instance_dict = json.loads(actual_json_instance_str) - - assert actual_json_instance_dict == expected_json_instance_dict - assert "extra_field_not_in_template" not in actual_json_instance_dict - assert "eval_case_id" not in actual_json_instance_dict - - assert ( - "custom_output_format_config" - not in payload["pointwise_metric_input"]["metric_spec"] - ) - assert ( - "system_instruction" not in payload["pointwise_metric_input"]["metric_spec"] - ) - assert "autorater_config" not in payload + assert actual_content_map_dict == expected_content_map + assert "extra_field_not_in_template" not in actual_content_map_dict + assert "eval_case_id" not in actual_content_map_dict def test_build_request_payload_various_field_types(self): metric = vertexai_genai_types.LLMMetric( - name="complex_eval", - prompt_template=( - "P: {prompt}, R: {response}, Hist: {conversation_history}, " - "SysInstruct: {system_instruction}, " - "DictField: {dict_field}, ListField: {list_field}, " - "IntField: {int_field}, BoolField: {bool_field}" - ), + name="test_various_fields", + prompt_template="{prompt}{response}{conversation_history}{system_instruction}{dict_field}{list_field}{int_field}{bool_field}", ) handler = _evals_metric_handlers.LLMMetricHandler( module=self.mock_evals_module, metric=metric ) - eval_case = vertexai_genai_types.EvalCase( prompt=genai_types.Content(parts=[genai_types.Part(text="The Prompt")]), responses=[ @@ -2804,21 +2793,18 @@ def test_build_request_payload_various_field_types(self): system_instruction=genai_types.Content( parts=[genai_types.Part(text="System instructions here.")] ), - dict_field={ # pylint: disable=unexpected-keyword-arg - "key1": "val1", - "key2": [1, 2], - }, - list_field=["a", "b", {"c": 3}], # pylint: disable=unexpected-keyword-arg - int_field=42, # pylint: disable=unexpected-keyword-arg - bool_field=True, # pylint: disable=unexpected-keyword-arg + dict_field={"key1": "val1", "key2": [1, 2]}, + list_field=["a", "b", {"c": 3}], + int_field=42, + bool_field=True, ) payload = handler._build_request_payload(eval_case=eval_case, response_index=0) - actual_json_instance_dict = json.loads( - payload["pointwise_metric_input"]["instance"]["json_instance"] - ) + actual_content_map_dict = payload["pointwise_metric_input"]["instance"][ + "content_map_instance" + ]["values"] - expected_json_instance_dict = { + expected_texts = { "prompt": "The Prompt", "response": "The Response", "conversation_history": "user: Turn 1 user\nmodel: Turn 1 model", @@ -2828,16 +2814,20 @@ def test_build_request_payload_various_field_types(self): "int_field": "42", "bool_field": "True", } - assert actual_json_instance_dict == expected_json_instance_dict + expected_content_map = { + key: _create_content_dump(text) for key, text in expected_texts.items() + } + + assert actual_content_map_dict == expected_content_map def test_build_request_payload_optional_metric_configs_set(self): metric = vertexai_genai_types.LLMMetric( - name="configured_metric", - prompt_template="P: {prompt}, R: {response}", + name="test_optional_configs", + prompt_template="{prompt}{response}", + judge_model="gemini-1.5-pro", + judge_model_sampling_count=5, + judge_model_system_instruction="You are a fair judge.", return_raw_output=True, - judge_model_system_instruction="Be a fair judge.", - judge_model="gemini-pro", - judge_model_sampling_count=10, ) handler = _evals_metric_handlers.LLMMetricHandler( module=self.mock_evals_module, metric=metric @@ -2853,23 +2843,25 @@ def test_build_request_payload_optional_metric_configs_set(self): payload = handler._build_request_payload(eval_case=eval_case, response_index=0) - expected_json_instance = {"prompt": "p", "response": "r"} - actual_json_instance = json.loads( - payload["pointwise_metric_input"]["instance"]["json_instance"] - ) - assert actual_json_instance == expected_json_instance + expected_content_map = { + "prompt": _create_content_dump("p"), + "response": _create_content_dump("r"), + } + actual_content_map_dict = payload["pointwise_metric_input"]["instance"][ + "content_map_instance" + ]["values"] + assert actual_content_map_dict == expected_content_map metric_spec_payload = payload["pointwise_metric_input"]["metric_spec"] assert ( - metric_spec_payload["metric_prompt_template"] - == "P: {prompt}, R: {response}" + metric_spec_payload["custom_output_format_config"]["return_raw_output"] + is True ) - assert metric_spec_payload["custom_output_format_config"]["return_raw_output"] - assert metric_spec_payload["system_instruction"] == "Be a fair judge." + assert metric_spec_payload["system_instruction"] == "You are a fair judge." autorater_config_payload = payload["autorater_config"] - assert autorater_config_payload["autorater_model"] == "gemini-pro" - assert autorater_config_payload["sampling_count"] == 10 + assert autorater_config_payload["autorater_model"] == "gemini-1.5-pro" + assert autorater_config_payload["sampling_count"] == 5 def test_merge_with_invalid_prompt_type(self): raw_dataset_1 = [ diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index d8f2d09553..a6f2d7c011 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -381,9 +381,11 @@ def process( "Comet result missing in API response for metric '%s'." " API response: %s", metric_name, - api_response.model_dump_json(exclude_none=True) - if api_response - else "None", + ( + api_response.model_dump_json(exclude_none=True) + if api_response + else "None" + ), ) elif metric_name == "metricx": if api_response and api_response.metricx_result: @@ -393,28 +395,34 @@ def process( "MetricX result missing in API response for metric '%s'." " API response: %s", metric_name, - api_response.model_dump_json(exclude_none=True) - if api_response - else "None", + ( + api_response.model_dump_json(exclude_none=True) + if api_response + else "None" + ), ) if score is None and not error_message: logger.warning( "Score could not be extracted for translation metric '%s'." " API response: %s", metric_name, - api_response.model_dump_json(exclude_none=True) - if api_response - else "None", + ( + api_response.model_dump_json(exclude_none=True) + if api_response + else "None" + ), ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logger.error( "Error processing/extracting score for translation metric '%s': %s." " API response: %s", metric_name, e, - api_response.model_dump_json(exclude_none=True) - if api_response - else "None", + ( + api_response.model_dump_json(exclude_none=True) + if api_response + else "None" + ), exc_info=True, ) error_message = f"Error extracting score: {e}" @@ -440,135 +448,202 @@ class LLMMetricHandler(MetricHandler): def __init__(self, module: "evals.Evals", metric: types.LLMMetric): super().__init__(module=module, metric=metric) - def _build_request_payload( - self, eval_case: types.EvalCase, response_index: int + def _build_rubric_based_input( + self, eval_case: types.EvalCase, response_content: genai_types.Content ) -> dict[str, Any]: - """Builds the request parameters for evaluate instances request.""" - request_payload = {} - if response_index >= len(eval_case.responses): - raise IndexError( - f"response_index {response_index} out of bounds for eval_case with" - f" {len(eval_case.responses)} responses." - ) - if eval_case.responses is None: - raise ValueError( - f"No responses found for eval_case with ID {eval_case.eval_case_id}." - ) - current_response_candidate = eval_case.responses[response_index] + """Builds the payload for a rubric-based LLM metric.""" + eval_case_dict = eval_case.model_dump(exclude={"responses"}) + # TODO: b/414660471 - add rubric_groups to eval_case type definition. + rubric_groups_data = eval_case_dict.get("rubric_groups") - prompt_text = _extract_text_from_content(eval_case.prompt) - if prompt_text is None: + if not isinstance(rubric_groups_data, dict): raise ValueError( - f"Prompt text missing for eval_case " - f"{eval_case.eval_case_id or 'Unknown ID'}." + f"Dataset column 'rubric_groups' for case {eval_case.eval_case_id} " + "must be a dictionary." ) - response_text = _extract_text_from_content(current_response_candidate.response) - if response_text is None: - raise ValueError( - f"Response text missing for candidate {response_index} in eval_case" - f" {eval_case.eval_case_id or 'Unknown ID'}." + rubrics_list = rubric_groups_data.get(self.metric.rubric_group_name, []) + if not isinstance(rubrics_list, list): + logger.warning( + "Rubric group '%s' in 'rubric_groups' is not a list for case %s.", + self.metric.rubric_group_name, + eval_case.eval_case_id, ) + rubrics_list = [] + + rubric_enhanced_contents = { + "prompt": [eval_case.prompt.model_dump(mode="json", exclude_none=True)], + "response": [response_content.model_dump(mode="json", exclude_none=True)], + "rubric_groups": { + self.metric.rubric_group_name: { + "rubrics": [ + r.model_dump(mode="json") if isinstance(r, types.Rubric) else r + for r in rubrics_list + ] + } + }, + } + + metric_spec_payload = { + "metric_prompt_template": self.metric.prompt_template, + "rubric_group_key": self.metric.rubric_group_name, + } - instance_data_for_json = { - "prompt": prompt_text, - "response": response_text, + return { + "rubric_based_metric_input": { + "metric_spec": metric_spec_payload, + "instance": {"rubric_enhanced_contents": rubric_enhanced_contents}, + } } + def _build_pointwise_input( + self, eval_case: types.EvalCase, response_content: genai_types.Content + ) -> dict[str, Any]: + """Builds the payload for a standard pointwise LLM metric.""" + instance_data = { + "prompt": eval_case.prompt, + "response": response_content, + } template_obj = types.PromptTemplate(text=self.metric.prompt_template) - required_vars_from_template = template_obj.variables - eval_case_all_data = eval_case.model_dump(exclude_none=True, by_alias=False) + required_vars = template_obj.variables - set(instance_data.keys()) + for var_name in required_vars: + if hasattr(eval_case, var_name): + instance_data[var_name] = getattr(eval_case, var_name) + + content_map_values = {} + for key, value in instance_data.items(): + content_list_to_serialize = [] + if isinstance(value, genai_types.Content): + content_list_to_serialize = [value] + elif isinstance(value, types.ResponseCandidate): + if value.response: + content_list_to_serialize = [value.response] + elif isinstance(value, list) and value: + if isinstance(value[0], genai_types.Content): + content_list_to_serialize = value + elif isinstance(value[0], types.Message): + history_texts = [] + for msg_obj in value: + msg_text = _extract_text_from_content(msg_obj.content) + if msg_text: + role = msg_obj.content.role or msg_obj.author or "user" + history_texts.append(f"{role}: {msg_text}") + content_list_to_serialize = [ + types.Content(parts=[types.Part(text="\n".join(history_texts))]) + ] + else: + content_list_to_serialize = [ + types.Content(parts=[types.Part(text=json.dumps(value))]) + ] + elif isinstance(value, dict): + content_list_to_serialize = [ + types.Content(parts=[types.Part(text=json.dumps(value))]) + ] + else: + content_list_to_serialize = [ + types.Content(parts=[types.Part(text=str(value))]) + ] - for var_name in required_vars_from_template: - if var_name in instance_data_for_json: - continue + content_map_values[key] = types.ContentMapContents( + contents=content_list_to_serialize + ) - if var_name in eval_case_all_data: - original_attr_value = getattr(eval_case, var_name, None) + instance_payload = types.PointwiseMetricInstance( + content_map_instance=types.ContentMap(values=content_map_values) + ) - if isinstance(original_attr_value, genai_types.Content): - extracted_text = _extract_text_from_content(original_attr_value) - if extracted_text is not None: - instance_data_for_json[var_name] = extracted_text - elif isinstance(original_attr_value, types.ResponseCandidate): - extracted_text = _extract_text_from_content( - original_attr_value.response - ) - if extracted_text is not None: - instance_data_for_json[var_name] = extracted_text - elif ( - isinstance(original_attr_value, list) - and original_attr_value - and isinstance(original_attr_value[0], types.Message) - ): - history_texts = [] - for _, msg_obj in enumerate(original_attr_value): - if msg_obj.content: - msg_text = _extract_text_from_content(msg_obj.content) - if msg_text: - role = msg_obj.content.role or msg_obj.author or "user" - history_texts.append(f"{role}: {msg_text}") - instance_data_for_json[var_name] = ( - "\n".join(history_texts) if history_texts else "" - ) - elif eval_case_all_data[var_name] is not None: - value_from_dump = eval_case_all_data[var_name] - if isinstance(value_from_dump, (dict, list)): - instance_data_for_json[var_name] = json.dumps(value_from_dump) - else: - instance_data_for_json[var_name] = str(value_from_dump) - - request_payload["pointwise_metric_input"] = { - "metric_spec": {"metric_prompt_template": self.metric.prompt_template}, - "instance": {"json_instance": json.dumps(instance_data_for_json)}, - } - metric_spec_payload = request_payload["pointwise_metric_input"]["metric_spec"] + metric_spec_payload = {"metric_prompt_template": self.metric.prompt_template} if self.metric.return_raw_output is not None: - metric_spec_payload["custom_output_format_config"] = { # type: ignore[index] - "return_raw_output": self.metric.return_raw_output, + metric_spec_payload["custom_output_format_config"] = { + "return_raw_output": self.metric.return_raw_output } - if self.metric.judge_model_system_instruction is not None: - metric_spec_payload[ # type: ignore[index] + if self.metric.judge_model_system_instruction: + metric_spec_payload[ "system_instruction" ] = self.metric.judge_model_system_instruction - autorater_config_payload = {} - if self.metric.judge_model is not None: - autorater_config_payload["autorater_model"] = self.metric.judge_model - if self.metric.judge_model_sampling_count is not None: - autorater_config_payload[ - "sampling_count" - ] = self.metric.judge_model_sampling_count # type: ignore[assignment] - if autorater_config_payload: - request_payload["autorater_config"] = autorater_config_payload # type: ignore[assignment] + return { + "pointwise_metric_input": { + "metric_spec": metric_spec_payload, + "instance": instance_payload.model_dump(mode="json", exclude_none=True), + } + } - logger.debug("request_payload: %s", request_payload) + def _add_autorater_config(self, payload: dict[str, Any]): + """Adds autorater config to the request payload if specified.""" + autorater_config = {} + if self.metric.judge_model: + autorater_config["autorater_model"] = self.metric.judge_model + if self.metric.judge_model_sampling_count: + autorater_config["sampling_count"] = self.metric.judge_model_sampling_count + + if not autorater_config: + return + + if "rubric_based_metric_input" in payload: + spec = payload["rubric_based_metric_input"]["metric_spec"] + if "judge_autorater_config" not in spec: + spec["judge_autorater_config"] = {} + spec["judge_autorater_config"].update(autorater_config) + else: + payload["autorater_config"] = autorater_config - return request_payload + def _build_request_payload( + self, eval_case: types.EvalCase, response_index: int + ) -> dict[str, Any]: + """Builds the request parameters for evaluate instances request.""" + if not eval_case.responses or response_index >= len(eval_case.responses): + raise IndexError(f"response_index {response_index} is out of bounds.") + + response_content = eval_case.responses[response_index].response + if not response_content: + raise ValueError( + f"Response content missing for candidate {response_index}." + ) + + if self.metric.rubric_group_name: + payload = self._build_rubric_based_input(eval_case, response_content) + else: + payload = self._build_pointwise_input(eval_case, response_content) + + self._add_autorater_config(payload) + return payload @override def process( self, eval_case: types.EvalCase, response_index: int ) -> types.EvalCaseMetricResult: + """Processes a single evaluation case for a specific LLM metric.""" metric_name = self.metric.name - logger.debug( - "LLMMetricHandler: Processing '%s' for case: %s", - metric_name, - eval_case.model_dump(exclude_none=True), - ) - response = self.module.evaluate_instances( - metric_config=self._build_request_payload(eval_case, response_index) - ) + try: + payload = self._build_request_payload(eval_case, response_index) + response = self.module.evaluate_instances(metric_config=payload) - return types.EvalCaseMetricResult( - metric_name=self.metric.name, - score=response.pointwise_metric_result.score - if response.pointwise_metric_result - else None, - explanation=response.pointwise_metric_result.explanation - if response.pointwise_metric_result - else None, - ) + if self.metric.rubric_group_name: + result_data = response.rubric_based_metric_result + return types.EvalCaseMetricResult( + metric_name=metric_name, + score=result_data.score if result_data else None, + rubric_verdicts=result_data.rubric_verdicts if result_data else [], + ) + else: + result_data = response.pointwise_metric_result + return types.EvalCaseMetricResult( + metric_name=metric_name, + score=result_data.score if result_data else None, + explanation=result_data.explanation if result_data else None, + ) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Error processing metric %s for case %s: %s", + metric_name, + eval_case.eval_case_id, + e, + exc_info=True, + ) + return types.EvalCaseMetricResult( + metric_name=metric_name, error_message=str(e) + ) @override def aggregate( @@ -608,7 +683,7 @@ def aggregate( metric_name=self.metric.name, **final_summary_dict, ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logger.error( "Error executing custom aggregate_summary_fn for metric '%s': %s." " Falling back to default aggregation.", @@ -702,13 +777,13 @@ def process( f" unexpected type {type(custom_function_result)}" ) - except Exception as e: - custom_function_name = ( - self.metric.custom_function.__name__ - if self.metric.custom_function - and hasattr(self.metric.custom_function, "__name__") - else "unknown_custom_function" - ) + except Exception as e: # pylint: disable=broad-exception-caught + if self.metric.custom_function and hasattr( + self.metric.custom_function, "__name__" + ): + custom_function_name = self.metric.custom_function.__name__ + else: + custom_function_name = "unknown_custom_function" error_msg = f"CustomFunctionError({custom_function_name}): {e}" score = None explanation = None @@ -940,8 +1015,8 @@ def compute_metrics_and_aggregate( ) future.add_done_callback(lambda _: pbar.update(1)) logger.debug( - "Submitting metric computation for case %d, response %d for" - " metric %s.", + "Submitting metric computation for case %d, " + "response %d for metric %s.", eval_case_index, response_index, metric_handler_instance.metric.name, @@ -956,8 +1031,8 @@ def compute_metrics_and_aggregate( ) except Exception as e: # pylint: disable=broad-exception-caught logger.error( - "Error submitting metric computation for case %d, response %d" - " for metric %s: %s", + "Error submitting metric computation for case %d, " + "response %d for metric %s: %s", eval_case_index, response_index, metric_handler_instance.metric.name, @@ -1002,7 +1077,7 @@ def compute_metrics_and_aggregate( eval_case_index, response_index, ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logger.error( "Error executing metric '%s' for case %s, response %s: %s", metric_name, @@ -1094,9 +1169,5 @@ def compute_metrics_and_aggregate( try: eval_result.win_rates = calculate_win_rates(eval_result) except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Error calculating win rates: %s", - e, - exc_info=True, - ) + logger.error("Error calculating win rates: %s", e, exc_info=True) return eval_result diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index cc5b217b04..5c5940b16b 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -205,6 +205,13 @@ def _PointwiseMetricInstance_to_vertex( if getv(from_object, ["json_instance"]) is not None: setv(to_object, ["jsonInstance"], getv(from_object, ["json_instance"])) + if getv(from_object, ["content_map_instance"]) is not None: + setv( + to_object, + ["contentMapInstance"], + getv(from_object, ["content_map_instance"]), + ) + return to_object @@ -1196,6 +1203,113 @@ def batch_evaluate( return return_value + def generate_rubrics( + self, + *, + src: Union[str, "pd.DataFrame"], + prompt_template: str, + rubric_group_name: str, + generator_model_config: Optional["types.AutoraterConfigOrDict"] = None, + rubric_content_type: Optional["types.RubricContentType"] = None, + rubric_type_ontology: Optional[list[str]] = None, + config: Optional[types.RubricGenerationConfigOrDict] = None, + ) -> "pd.DataFrame": + """Generates rubrics for each prompt in the source and adds them as a new column + + structured as a dictionary. + + Args: + src: The source of the prompts. Can be a string (path to a local + file, a GCS path, or a BigQuery table) or a Pandas DataFrame. The + loaded data must contain either a 'prompt' column (for text) or a + 'request' column (for text or multimodal Gemini Content). + prompt_template: Template for the rubric generation prompt. If using + a 'prompt' column, use {prompt} as the placeholder. If using a + 'request' column, this template is passed to the service along + with the content. + rubric_group_name: Name for the key within the dictionary in the new + column. + generator_model_config: Optional. Configuration for the model used + in rubric generation, including the model name (e.g., + "gemini-1.5-flash") within the 'autorater_model' field. + rubric_content_type: Optional. The type of rubric content to be + generated. + rubric_type_ontology: Optional. A pre-defined list of allowed types + for generated rubrics. + + Returns: + DataFrame with an added column named `rubric_groups`. Each cell in + this column contains a dictionary like: {rubric_group_name: + [list[Rubric]]}. + """ + try: + prompts_df = _evals_common._load_dataframe(self._api_client, src) + except Exception as e: + raise ValueError( + f"Failed to load prompt dataset from source: {src}. Error: {e}" + ) + + if "prompt" not in prompts_df.columns and "request" not in prompts_df.columns: + raise ValueError("Loaded dataset must have a 'prompt' or 'request' column.") + if not rubric_group_name: + raise ValueError("rubric_group_name cannot be empty.") + + input_column = "request" if "request" in prompts_df.columns else "prompt" + logger.info( + f"Generating rubrics for {len(prompts_df)} prompts from column" + f" '{input_column}', group: '{rubric_group_name}'..." + ) + all_rubric_groups = [] + + spec_dict = { + "prompt_template": prompt_template, + "rubric_content_type": rubric_content_type, + "rubric_type_ontology": rubric_type_ontology, + "generator_model_config": generator_model_config, + } + spec_dict = {k: v for k, v in spec_dict.items() if v is not None} + spec = types.RubricGenerationSpec.model_validate(spec_dict) + + for _, row in prompts_df.iterrows(): + input_data = row[input_column] + if isinstance(input_data, str): + contents = [types.Content(parts=[types.Part(text=input_data)])] + elif isinstance(input_data, list): + contents = input_data + else: + logger.warning( + "Skipping row: Unexpected input format in column" + f" '{input_column}'." + ) + all_rubric_groups.append({rubric_group_name: []}) + continue + + current_spec = spec.model_copy(deep=True) + + try: + response = self._generate_rubrics( + contents=contents, + rubric_generation_spec=current_spec, + config=config, + ) + rubric_group = {rubric_group_name: response.generated_rubrics} + all_rubric_groups.append(rubric_group) + except Exception as e: + logger.error( + "Rubric generation failed for input: %s... Error: %s", + str(input_data)[:50], + e, + ) + all_rubric_groups.append({rubric_group_name: []}) + + prompts_with_rubrics = prompts_df.copy() + prompts_with_rubrics["rubric_groups"] = all_rubric_groups + logger.info( + "Rubric generation complete. Added column 'rubric_groups' with key" + f" '{rubric_group_name}'." + ) + return prompts_with_rubrics + class AsyncEvals(_api_module.BaseModule): async def _evaluate_instances( diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index 3e6055515f..374c51445d 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -473,6 +473,24 @@ class RougeInputDict(TypedDict, total=False): RougeInputOrDict = Union[RougeInput, RougeInputDict] +class ContentMap(_common.BaseModel): + """Map of placeholder in metric prompt template to contents of model input.""" + + values: Optional[dict[str, "ContentMapContents"]] = Field( + default=None, description="""Map of placeholder to contents.""" + ) + + +class ContentMapDict(TypedDict, total=False): + """Map of placeholder in metric prompt template to contents of model input.""" + + values: Optional[dict[str, "ContentMapContents"]] + """Map of placeholder to contents.""" + + +ContentMapOrDict = Union[ContentMap, ContentMapDict] + + class PointwiseMetricInstance(_common.BaseModel): """Pointwise metric instance.""" @@ -480,6 +498,10 @@ class PointwiseMetricInstance(_common.BaseModel): default=None, description="""Instance specified as a json string. String key-value pairs are expected in the json_instance to render PointwiseMetricSpec.instance_prompt_template.""", ) + content_map_instance: Optional[ContentMap] = Field( + default=None, + description="""Key-value contents for the mutlimodality input, including text, image, video, audio, and pdf, etc. The key is placeholder in metric prompt template, and the value is the multimodal content.""", + ) class PointwiseMetricInstanceDict(TypedDict, total=False): @@ -488,6 +510,9 @@ class PointwiseMetricInstanceDict(TypedDict, total=False): json_instance: Optional[str] """Instance specified as a json string. String key-value pairs are expected in the json_instance to render PointwiseMetricSpec.instance_prompt_template.""" + content_map_instance: Optional[ContentMapDict] + """Key-value contents for the mutlimodality input, including text, image, video, audio, and pdf, etc. The key is placeholder in metric prompt template, and the value is the multimodal content.""" + PointwiseMetricInstanceOrDict = Union[ PointwiseMetricInstance, PointwiseMetricInstanceDict @@ -1313,24 +1338,6 @@ class RubricBasedMetricSpecDict(TypedDict, total=False): RubricBasedMetricSpecOrDict = Union[RubricBasedMetricSpec, RubricBasedMetricSpecDict] -class ContentMap(_common.BaseModel): - """Map of placeholder in metric prompt template to contents of model input.""" - - values: Optional[dict[str, list[genai_types.Content]]] = Field( - default=None, description="""Map of placeholder to contents.""" - ) - - -class ContentMapDict(TypedDict, total=False): - """Map of placeholder in metric prompt template to contents of model input.""" - - values: Optional[dict[str, list[genai_types.Content]]] - """Map of placeholder to contents.""" - - -ContentMapOrDict = Union[ContentMap, ContentMapDict] - - class RubricEnhancedContents(_common.BaseModel): """Rubric-enhanced contents for evaluation.""" @@ -5890,6 +5897,11 @@ def to_yaml_file(self, file_path: str, version: Optional[str] = None) -> None: class LLMMetric(Metric): """A metric that uses LLM-as-a-judge for evaluation.""" + rubric_group_name: Optional[str] = Field( + default=None, + description="""Optional. The name of the column in the EvaluationDataset containing the list of rubrics to use for this metric.""", + ) + @field_validator("prompt_template", mode="before") @classmethod def validate_prompt_template(cls, value: Union[str, "MetricPromptBuilder"]) -> str: @@ -6557,6 +6569,24 @@ class EvaluationResultDict(TypedDict, total=False): EvaluationResultOrDict = Union[EvaluationResult, EvaluationResultDict] +class ContentMapContents(_common.BaseModel): + """Map of placeholder in metric prompt template to contents of model input.""" + + contents: Optional[list[genai_types.Content]] = Field( + default=None, description="""Contents of the model input.""" + ) + + +class ContentMapContentsDict(TypedDict, total=False): + """Map of placeholder in metric prompt template to contents of model input.""" + + contents: Optional[list[genai_types.Content]] + """Contents of the model input.""" + + +ContentMapContentsOrDict = Union[ContentMapContents, ContentMapContentsDict] + + class EvaluateMethodConfig(_common.BaseModel): """Optional parameters for the evaluate method.""" From 7764d9851db8aa52c0e18894254768ce479dd616 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 12:28:17 -0400 Subject: [PATCH 13/13] chore(main): release 1.104.0 (#5537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 38 +++++++++++++++++++ google/cloud/aiplatform/gapic_version.py | 2 +- .../schema/predict/instance/gapic_version.py | 2 +- .../predict/instance_v1/gapic_version.py | 2 +- .../v1/schema/predict/params/gapic_version.py | 2 +- .../schema/predict/params_v1/gapic_version.py | 2 +- .../predict/prediction/gapic_version.py | 2 +- .../predict/prediction_v1/gapic_version.py | 2 +- .../trainingjob/definition/gapic_version.py | 2 +- .../definition_v1/gapic_version.py | 2 +- .../schema/predict/instance/gapic_version.py | 2 +- .../predict/instance_v1beta1/gapic_version.py | 2 +- .../schema/predict/params/gapic_version.py | 2 +- .../predict/params_v1beta1/gapic_version.py | 2 +- .../predict/prediction/gapic_version.py | 2 +- .../prediction_v1beta1/gapic_version.py | 2 +- .../trainingjob/definition/gapic_version.py | 2 +- .../definition_v1beta1/gapic_version.py | 2 +- google/cloud/aiplatform/version.py | 2 +- google/cloud/aiplatform_v1/gapic_version.py | 2 +- .../cloud/aiplatform_v1beta1/gapic_version.py | 2 +- pypi/_vertex_ai_placeholder/version.py | 2 +- ...t_metadata_google.cloud.aiplatform.v1.json | 2 +- ...adata_google.cloud.aiplatform.v1beta1.json | 2 +- 25 files changed, 62 insertions(+), 24 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c7af9b7b20..b4c413218b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.103.0" + ".": "1.104.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d8e6e19dd..bc469e827a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +## [1.104.0](https://github.com/googleapis/python-aiplatform/compare/v1.103.0...v1.104.0) (2025-07-15) + + +### Features + +* Add Aggregation Output in EvaluateDataset Get Operation Response ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* Add API for Managed OSS Fine Tuning ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* Add flexstart option to v1beta1 ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* Expose task_unique_name in pipeline task details for pipeline rerun ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* GenAI SDK client - Add support for context specs when creating agent engine instances ([8321826](https://github.com/googleapis/python-aiplatform/commit/8321826fe3c18b8b938861fe0930aa7ec4e97fa3)) +* GenAI SDK client(evals) - Add Generate Rubrics API config and internal method ([6727fb3](https://github.com/googleapis/python-aiplatform/commit/6727fb3761e91d32359e13b1c28494d2f16d165a)) +* GenAI SDK client(evals) - add rubric-based evaluation types ([df2390e](https://github.com/googleapis/python-aiplatform/commit/df2390e881b06629da29adb21a69c8cc68585aba)) +* GenAI SDK client(evals) - Add support for rubric-based metrics, and rubric customization eval workflow ([36bfda2](https://github.com/googleapis/python-aiplatform/commit/36bfda246eeb2b2a171cec9fb2602f4802601b7d)) +* Some comments changes in machine_resources.proto to v1beta1 ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* Vertex AI Model Garden custom model deploy Public Preview ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) + + +### Bug Fixes + +* GenAI SDK client(evals) - Handle optional pandas dependency in type hints ([cee8d8b](https://github.com/googleapis/python-aiplatform/commit/cee8d8b85f00efb259dbee5e2fa6d0cbed73e24c)) + + +### Documentation + +* A comment for field `boot_disk_type` in message `.google.cloud.aiplatform.v1beta1.DiskSpec` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `learning_rate_multiplier` in message `.google.cloud.aiplatform.v1beta1.SupervisedHyperParameters` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `machine_spec` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `max_replica_count` in message `.google.cloud.aiplatform.v1beta1.AutomaticResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `max_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `min_replica_count` in message `.google.cloud.aiplatform.v1beta1.AutomaticResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `min_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `model` in message `.google.cloud.aiplatform.v1beta1.TunedModel` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `required_replica_count` in message `.google.cloud.aiplatform.v1beta1.DedicatedResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `training_dataset_uri` in message `.google.cloud.aiplatform.v1beta1.SupervisedTuningSpec` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for field `validation_dataset_uri` in message `.google.cloud.aiplatform.v1beta1.SupervisedTuningSpec` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* A comment for message `DedicatedResources` is changed ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) +* Add constraints for AggregationMetric enum and default value for flip_enabled field in AutoraterConfig ([43eee8d](https://github.com/googleapis/python-aiplatform/commit/43eee8de3a6cbcf5e74a1272565b5307e882d194)) + ## [1.103.0](https://github.com/googleapis/python-aiplatform/compare/v1.102.0...v1.103.0) (2025-07-10) diff --git a/google/cloud/aiplatform/gapic_version.py b/google/cloud/aiplatform/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/gapic_version.py +++ b/google/cloud/aiplatform/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py index 5eadcf7592..33de5ce1f5 100644 --- a/google/cloud/aiplatform/version.py +++ b/google/cloud/aiplatform/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.103.0" +__version__ = "1.104.0" diff --git a/google/cloud/aiplatform_v1/gapic_version.py b/google/cloud/aiplatform_v1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform_v1/gapic_version.py +++ b/google/cloud/aiplatform_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform_v1beta1/gapic_version.py b/google/cloud/aiplatform_v1beta1/gapic_version.py index aee4ab459f..355ce7b50e 100644 --- a/google/cloud/aiplatform_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.103.0" # {x-release-please-version} +__version__ = "1.104.0" # {x-release-please-version} diff --git a/pypi/_vertex_ai_placeholder/version.py b/pypi/_vertex_ai_placeholder/version.py index 513c7149ee..a47bcbab6e 100644 --- a/pypi/_vertex_ai_placeholder/version.py +++ b/pypi/_vertex_ai_placeholder/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.103.0" +__version__ = "1.104.0" diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json index 8dc7f2091f..cb600b6caa 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "0.1.0" + "version": "1.104.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json index a789b54e19..d5f3fc65d8 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "0.1.0" + "version": "1.104.0" }, "snippets": [ {