From bbd5af30aa9f3165fa4602161f7b190972f27813 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 2 Oct 2024 09:47:52 -0700 Subject: [PATCH 1/6] Image gen (#549) * Add more enum samples Change-Id: I743d5967cc1cc91576b8ddf5a60db1767d94508d * format Change-Id: I8f6f9389f1cae0a7c934217968d4e2e20bb9590e * From: https://github.com/googleapis/python-aiplatform/tree/v1.65.0/vertexai/vision_models Change-Id: I693579ccf2994212f25d0354d091d3210fbf3212 * clear out extra classes Change-Id: I64dec6a71f3f04fa834f2174e4a6b2d4740f4e90 * remove gcs_uri Change-Id: Ieafe776bde8e79e1747ab6cf252b49a001ab66bf * IPython reprs Change-Id: I586876f524684dc3d0ee0ea8510b56bc5153642b * remove output_gce_uri Change-Id: Ic4424d06a705bf94f0fade34468160566cb3a6be * remove IPython reprs Change-Id: I474c76c1e73d7a92a653932d1984c02de7e3b71a * remove more gcs_uri. Change-Id: I9f6b8c879dc8f38c5193ddb1e271a62984c7f020 * handle instances converversion to Value protos. Change-Id: Id33f8d2d6a4cffbfb7b0d37955cc800a867a70d5 * Basically works. Change-Id: I28364ab70b2a263b29026f2cf2d1d4f807d88f53 * add _repr_png_ Change-Id: I436170460e17983637283d0086ebc232c9c425ce * Remoive "add watermark" switch. Change-Id: I6580ae1b508a458c9813fdf161a08329cd676c08 * Remove seed (it doesn't work without watermark), +fix upscale Change-Id: I4c394cd861d2646cd663224f0bbeec52580bc0bd * remove edit and upscale Change-Id: Ic9c270279ee020baef2c3b2117199ff17b066d88 * remove upscale Change-Id: I84f3b603732c3c1e91da9d5abf38c332b24772cb * skip bad test Change-Id: Ief70d4fdc9d7478b402cf2f790817f98e336b25a * format Change-Id: I4d00cad9d0e6485f5b710d2ffcec82a9b738e2b0 * check enums Change-Id: I8e7b92fc15d3941b7fa74a97b95d2577be9d6c1f * Remove * unpackng Change-Id: Iddc42b906dfab12dfa500f0f9774d58521684548 * ignore typing errors, these are incorrect Change-Id: Idc4e594811e52aa3d17562851ef02808e6dd5633 * fix typing? Change-Id: I5005dd38b7d3a03edc1c54d42f3a6c8f959d7274 * fix lazy loading with typing Change-Id: I2ebb3afbe3df93f918ffe5f66dac20392665c6cc * Revert "fix lazy loading with typing" This reverts commit 0baabfe01d3598c9b0f54e2d6e0ceef12ba5f8c3. * Use if TYPE_CHECKING Change-Id: I63c4c7909f7d7060a4b557f41cff4ba9010e004c * format Change-Id: I446692905361c56505b070d72d94ea7fecdaede8 --- google/generativeai/__init__.py | 2 + google/generativeai/client.py | 8 + google/generativeai/generative_models.py | 2 +- google/generativeai/vision_models/__init__.py | 29 + .../vision_models/_vision_models.py | 522 ++++++++++++++++++ samples/controlled_generation.py | 4 +- tests/test_async_code_match.py | 1 + 7 files changed, 565 insertions(+), 3 deletions(-) create mode 100644 google/generativeai/vision_models/__init__.py create mode 100644 google/generativeai/vision_models/_vision_models.py diff --git a/google/generativeai/__init__.py b/google/generativeai/__init__.py index 5b143d768..73025a1b4 100644 --- a/google/generativeai/__init__.py +++ b/google/generativeai/__init__.py @@ -59,6 +59,8 @@ from google.generativeai.generative_models import GenerativeModel from google.generativeai.generative_models import ChatSession +from google.generativeai.vision_models import * + from google.generativeai.models import list_models from google.generativeai.models import list_tuned_models diff --git a/google/generativeai/client.py b/google/generativeai/client.py index d2eb6b1c9..a75643f1a 100644 --- a/google/generativeai/client.py +++ b/google/generativeai/client.py @@ -384,3 +384,11 @@ def get_default_permission_client() -> glm.PermissionServiceClient: def get_default_permission_async_client() -> glm.PermissionServiceAsyncClient: return _client_manager.get_default_client("permission_async") + + +def get_default_prediction_client() -> glm.PermissionServiceClient: + return _client_manager.get_default_client("prediction") + + +def get_default_prediction_async_client() -> glm.PermissionServiceAsyncClient: + return _client_manager.get_default_client("prediction_async") diff --git a/google/generativeai/generative_models.py b/google/generativeai/generative_models.py index 134430b2e..dcda3135a 100644 --- a/google/generativeai/generative_models.py +++ b/google/generativeai/generative_models.py @@ -4,7 +4,7 @@ from collections.abc import Iterable import textwrap -from typing import Any, Union, overload +from typing import Any, Literal, Union, overload import reprlib # pylint: disable=bad-continuation, line-too-long diff --git a/google/generativeai/vision_models/__init__.py b/google/generativeai/vision_models/__init__.py new file mode 100644 index 000000000..65a545831 --- /dev/null +++ b/google/generativeai/vision_models/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Classes for working with vision models.""" + +from google.generativeai.vision_models._vision_models import ( + Image, + GeneratedImage, + ImageGenerationModel, + ImageGenerationResponse, +) + +__all__ = [ + "Image", + "GeneratedImage", + "ImageGenerationModel", + "ImageGenerationResponse", +] diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py new file mode 100644 index 000000000..0bb4f7dbe --- /dev/null +++ b/google/generativeai/vision_models/_vision_models.py @@ -0,0 +1,522 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=bad-continuation, line-too-long, protected-access +"""Classes for working with vision models.""" + +import base64 +import collections +import dataclasses +import hashlib +import io +import json +import pathlib +import typing +from typing import Any, Dict, List, Literal, Optional, Union + +from google.generativeai import client +from google.generativeai import protos + +from google.protobuf import struct_pb2 + +from proto.marshal.collections import maps +from proto.marshal.collections import repeated + + +# pylint: disable=g-import-not-at-top\ +if typing.TYPE_CHECKING: + from IPython import display as IPython_display +else: + try: + from IPython import display as IPython_display + except ImportError: + IPython_display = None + +if typing.TYPE_CHECKING: + import PIL.Image as PIL_Image +else: + try: + from PIL import Image as PIL_Image + except ImportError: + PIL_Image = None + + +# This is to get around https://github.com/googleapis/proto-plus-python/issues/488 +def to_value(value) -> struct_pb2.Value: + """Return a protobuf Value object representing this value.""" + if isinstance(value, struct_pb2.Value): + return value + if value is None: + return struct_pb2.Value(null_value=0) + if isinstance(value, bool): + return struct_pb2.Value(bool_value=value) + if isinstance(value, (int, float)): + return struct_pb2.Value(number_value=float(value)) + if isinstance(value, str): + return struct_pb2.Value(string_value=value) + if isinstance(value, collections.abc.Sequence): + return struct_pb2.Value(list_value=to_list_value(value)) + if isinstance(value, collections.abc.Mapping): + return struct_pb2.Value(struct_value=to_mapping_value(value)) + raise ValueError("Unable to coerce value: %r" % value) + + +def to_list_value(value) -> struct_pb2.ListValue: + # We got a proto, or else something we sent originally. + # Preserve the instance we have. + if isinstance(value, struct_pb2.ListValue): + return value + if isinstance(value, repeated.RepeatedComposite): + return struct_pb2.ListValue(values=[v for v in value.pb]) + + # We got a list (or something list-like); convert it. + return struct_pb2.ListValue(values=[to_value(v) for v in value]) + + +def to_mapping_value(value) -> struct_pb2.Struct: + # We got a proto, or else something we sent originally. + # Preserve the instance we have. + if isinstance(value, struct_pb2.Struct): + return value + if isinstance(value, maps.MapComposite): + return struct_pb2.Struct( + fields={k: v for k, v in value.pb.items()}, + ) + + # We got a dict (or something dict-like); convert it. + return struct_pb2.Struct(fields={k: to_value(v) for k, v in value.items()}) + + +AspectRatio = Literal["1:1", "9:16", "16:9", "4:3", "3:4"] +ASPECT_RATIOS = AspectRatio.__args__ # type: ignore + +OutputMimeType = Literal["image/png", "image/jpeg"] +OUTPUT_MIME_TYPES = OutputMimeType.__args__ # type: ignore + +SafetyFilterLevel = Literal["block_most", "block_some", "block_few", "block_fewest"] +SAFETY_FILTER_LEVELS = SafetyFilterLevel.__args__ # type: ignore + +PersonGeneration = Literal["dont_allow", "allow_adult", "allow_all"] +PERSON_GENERATIONS = PersonGeneration.__args__ # type: ignore + + +class Image: + """Image.""" + + __module__ = "vertexai.vision_models" + + _loaded_bytes: Optional[bytes] = None + _loaded_image: Optional["PIL_Image.Image"] = None + + def __init__( + self, + image_bytes: Optional[bytes], + ): + """Creates an `Image` object. + + Args: + image_bytes: Image file bytes. Image can be in PNG or JPEG format. + """ + self._image_bytes = image_bytes + + @staticmethod + def load_from_file(location: str) -> "Image": + """Loads image from local file or Google Cloud Storage. + + Args: + location: Local path or Google Cloud Storage uri from where to load + the image. + + Returns: + Loaded image as an `Image` object. + """ + # Load image from local path + image_bytes = pathlib.Path(location).read_bytes() + image = Image(image_bytes=image_bytes) + return image + + @property + def _image_bytes(self) -> bytes: + return self._loaded_bytes + + @_image_bytes.setter + def _image_bytes(self, value: bytes): + self._loaded_bytes = value + + @property + def _pil_image(self) -> "PIL_Image.Image": # type: ignore + if self._loaded_image is None: + if not PIL_Image: + raise RuntimeError( + "The PIL module is not available. Please install the Pillow package." + ) + self._loaded_image = PIL_Image.open(io.BytesIO(self._image_bytes)) + return self._loaded_image + + @property + def _size(self): + return self._pil_image.size + + @property + def _mime_type(self) -> str: + """Returns the MIME type of the image.""" + if PIL_Image: + return PIL_Image.MIME.get(self._pil_image.format, "image/jpeg") + # Fall back to jpeg + return "image/jpeg" + + def show(self): + """Shows the image. + + This method only works when in a notebook environment. + """ + if PIL_Image and IPython_display: + IPython_display.display(self._pil_image) + + def save(self, location: str): + """Saves image to a file. + + Args: + location: Local path where to save the image. + """ + pathlib.Path(location).write_bytes(self._image_bytes) + + def _as_base64_string(self) -> str: + """Encodes image using the base64 encoding. + + Returns: + Base64 encoding of the image as a string. + """ + # ! b64encode returns `bytes` object, not `str`. + # We need to convert `bytes` to `str`, otherwise we get service error: + # "received initial metadata size exceeds limit" + return base64.b64encode(self._image_bytes).decode("ascii") + + def _repr_png_(self): + return self._pil_image._repr_png_() # type:ignore + + +class ImageGenerationModel: + """Generates images from text prompt. + + Examples:: + + model = ImageGenerationModel.from_pretrained("imagegeneration@002") + response = model.generate_images( + prompt="Astronaut riding a horse", + # Optional: + number_of_images=1, + ) + response[0].show() + response[0].save("image1.png") + """ + + def __init__(self, model_id: str): + if not model_id.startswith("models"): + model_id = f"models/{model_id}" + self.model_name = model_id + self._client = None + + @classmethod + def from_pretrained(cls, model_name: str): + """For vertex compatibility""" + return cls(model_name) + + def _generate_images( + self, + prompt: str, + *, + negative_prompt: Optional[str] = None, + number_of_images: int = 1, + width: Optional[int] = None, + height: Optional[int] = None, + aspect_ratio: Optional[AspectRatio] = None, + guidance_scale: Optional[float] = None, + output_mime_type: Optional[OutputMimeType] = None, + compression_quality: Optional[float] = None, + language: Optional[str] = None, + safety_filter_level: Optional[SafetyFilterLevel] = None, + person_generation: Optional[PersonGeneration] = None, + ) -> "ImageGenerationResponse": + """Generates images from text prompt. + + Args: + prompt: Text prompt for the image. + negative_prompt: A description of what you want to omit in the generated + images. + number_of_images: Number of images to generate. Range: 1..8. + width: Width of the image. One of the sizes must be 256 or 1024. + height: Height of the image. One of the sizes must be 256 or 1024. + aspect_ratio: Aspect ratio for the image. Supported values are: + * 1:1 - Square image + * 9:16 - Portait image + * 16:9 - Landscape image + * 4:3 - Landscape, desktop ratio. + * 3:4 - Portrait, desktop ratio + guidance_scale: Controls the strength of the prompt. Suggested values + are - * 0-9 (low strength) * 10-20 (medium strength) * 21+ (high + strength) + output_mime_type: Which image format should the output be saved as. + Supported values: * image/png: Save as a PNG image * image/jpeg: Save + as a JPEG image + compression_quality: Level of compression if the output mime type is + selected to be image/jpeg. Float between 0 to 100 + language: Language of the text prompt for the image. Default: None. + Supported values are `"en"` for English, `"hi"` for Hindi, `"ja"` for + Japanese, `"ko"` for Korean, and `"auto"` for automatic language + detection. + safety_filter_level: Adds a filter level to Safety filtering. Supported + values are: * "block_most" : Strongest filtering level, most strict + blocking * "block_some" : Block some problematic prompts and responses + * "block_few" : Block fewer problematic prompts and responses * + "block_fewest" : Block very few problematic prompts and responses + person_generation: Allow generation of people by the model Supported + values are: * "dont_allow" : Block generation of people * + "allow_adult" : Generate adults, but not children * "allow_all" : + Generate adults and children + + Returns: + An `ImageGenerationResponse` object. + """ + if self._client is None: + self._client = client.get_default_prediction_client() + # Note: Only a single prompt is supported by the service. + instance = {"prompt": prompt} + shared_generation_parameters = { + "prompt": prompt, + # b/295946075 The service stopped supporting image sizes. + # "width": width, + # "height": height, + "number_of_images_in_batch": number_of_images, + } + + parameters = {} + max_size = max(width or 0, height or 0) or None + if aspect_ratio is not None: + if aspect_ratio not in ASPECT_RATIOS: + raise ValueError(f"aspect_ratio not in {ASPECT_RATIOS}") + parameters["aspectRatio"] = aspect_ratio + elif max_size: + # Note: The size needs to be a string + parameters["sampleImageSize"] = str(max_size) + if height is not None and width is not None and height != width: + parameters["aspectRatio"] = f"{width}:{height}" + + parameters["sampleCount"] = number_of_images + if negative_prompt: + parameters["negativePrompt"] = negative_prompt + shared_generation_parameters["negative_prompt"] = negative_prompt + + if guidance_scale is not None: + parameters["guidanceScale"] = guidance_scale + shared_generation_parameters["guidance_scale"] = guidance_scale + + if language is not None: + parameters["language"] = language + shared_generation_parameters["language"] = language + + parameters["outputOptions"] = {} + if output_mime_type is not None: + if output_mime_type not in OUTPUT_MIME_TYPES: + raise ValueError(f"output_mime_type not in {OUTPUT_MIME_TYPES}") + parameters["outputOptions"]["mimeType"] = output_mime_type + shared_generation_parameters["mime_type"] = output_mime_type + + if compression_quality is not None: + parameters["outputOptions"]["compressionQuality"] = compression_quality + shared_generation_parameters["compression_quality"] = compression_quality + + if safety_filter_level is not None: + if safety_filter_level not in SAFETY_FILTER_LEVELS: + raise ValueError(f"safety_filter_level not in {SAFETY_FILTER_LEVELS}") + parameters["safetySetting"] = safety_filter_level + shared_generation_parameters["safety_filter_level"] = safety_filter_level + + if person_generation is not None: + parameters["personGeneration"] = person_generation + shared_generation_parameters["person_generation"] = person_generation + + # This is to get around https://github.com/googleapis/proto-plus-python/issues/488 + pr = protos.PredictRequest.pb() + request = pr( + model=self.model_name, instances=[to_value(instance)], parameters=to_value(parameters) + ) + + response = self._client.predict(request) + + generated_images: List["GeneratedImage"] = [] + for idx, prediction in enumerate(response.predictions): + generation_parameters = dict(shared_generation_parameters) + generation_parameters["index_of_image_in_batch"] = idx + encoded_bytes = prediction.get("bytesBase64Encoded") + generated_image = GeneratedImage( + image_bytes=base64.b64decode(encoded_bytes) if encoded_bytes else None, + generation_parameters=generation_parameters, + ) + generated_images.append(generated_image) + + return ImageGenerationResponse(images=generated_images) + + def generate_images( + self, + prompt: str, + *, + negative_prompt: Optional[str] = None, + number_of_images: int = 1, + aspect_ratio: Optional[AspectRatio] = None, + guidance_scale: Optional[float] = None, + language: Optional[str] = None, + safety_filter_level: Optional[SafetyFilterLevel] = None, + person_generation: Optional[PersonGeneration] = None, + ) -> "ImageGenerationResponse": + """Generates images from text prompt. + + Args: + prompt: Text prompt for the image. + negative_prompt: A description of what you want to omit in the generated + images. + number_of_images: Number of images to generate. Range: 1..8. + aspect_ratio: Changes the aspect ratio of the generated image Supported + values are: + * "1:1" : 1:1 aspect ratio + * "9:16" : 9:16 aspect ratio + * "16:9" : 16:9 aspect ratio + * "4:3" : 4:3 aspect ratio + * "3:4" : 3:4 aspect_ratio + guidance_scale: Controls the strength of the prompt. Suggested values are: + * 0-9 (low strength) + * 10-20 (medium strength) + * 21+ (high strength) + language: Language of the text prompt for the image. Default: None. + Supported values are `"en"` for English, `"hi"` for Hindi, `"ja"` + for Japanese, `"ko"` for Korean, and `"auto"` for automatic language + detection. + safety_filter_level: Adds a filter level to Safety filtering. Supported + values are: + * "block_most" : Strongest filtering level, most strict + blocking + * "block_some" : Block some problematic prompts and responses + * "block_few" : Block fewer problematic prompts and responses + * "block_fewest" : Block very few problematic prompts and responses + person_generation: Allow generation of people by the model Supported + values are: + * "dont_allow" : Block generation of people + * "allow_adult" : Generate adults, but not children + * "allow_all" : Generate adults and children + Returns: + An `ImageGenerationResponse` object. + """ + return self._generate_images( + prompt=prompt, + negative_prompt=negative_prompt, + number_of_images=number_of_images, + aspect_ratio=aspect_ratio, + guidance_scale=guidance_scale, + language=language, + safety_filter_level=safety_filter_level, + person_generation=person_generation, + ) + + +@dataclasses.dataclass +class ImageGenerationResponse: + """Image generation response. + + Attributes: + images: The list of generated images. + """ + + __module__ = "vertexai.preview.vision_models" + + images: List["GeneratedImage"] + + def __iter__(self) -> typing.Iterator["GeneratedImage"]: + """Iterates through the generated images.""" + yield from self.images + + def __getitem__(self, idx: int) -> "GeneratedImage": + """Gets the generated image by index.""" + return self.images[idx] + + +_EXIF_USER_COMMENT_TAG_IDX = 0x9286 +_IMAGE_GENERATION_PARAMETERS_EXIF_KEY = ( + "google.cloud.vertexai.image_generation.image_generation_parameters" +) + + +class GeneratedImage(Image): + """Generated image.""" + + __module__ = "google.generativeai" + + def __init__( + self, + image_bytes: Optional[bytes], + generation_parameters: Dict[str, Any], + ): + """Creates a `GeneratedImage` object. + + Args: + image_bytes: Image file bytes. Image can be in PNG or JPEG format. + generation_parameters: Image generation parameter values. + """ + super().__init__(image_bytes=image_bytes) + self._generation_parameters = generation_parameters + + @property + def generation_parameters(self): + """Image generation parameters as a dictionary.""" + return self._generation_parameters + + @staticmethod + def load_from_file(location: str) -> "GeneratedImage": + """Loads image from file. + + Args: + location: Local path from where to load the image. + + Returns: + Loaded image as a `GeneratedImage` object. + """ + base_image = Image.load_from_file(location=location) + exif = base_image._pil_image.getexif() # pylint: disable=protected-access + exif_comment_dict = json.loads(exif[_EXIF_USER_COMMENT_TAG_IDX]) + generation_parameters = exif_comment_dict[_IMAGE_GENERATION_PARAMETERS_EXIF_KEY] + return GeneratedImage( + image_bytes=base_image._image_bytes, # pylint: disable=protected-access + generation_parameters=generation_parameters, + ) + + def save(self, location: str, include_generation_parameters: bool = True): + """Saves image to a file. + + Args: + location: Local path where to save the image. + include_generation_parameters: Whether to include the image + generation parameters in the image's EXIF metadata. + """ + if include_generation_parameters: + if not self._generation_parameters: + raise ValueError("Image does not have generation parameters.") + if not PIL_Image: + raise ValueError("The PIL module is required for saving generation parameters.") + + exif = self._pil_image.getexif() + exif[_EXIF_USER_COMMENT_TAG_IDX] = json.dumps( + {_IMAGE_GENERATION_PARAMETERS_EXIF_KEY: self._generation_parameters} + ) + self._pil_image.save(location, exif=exif) + else: + super().save(location=location) diff --git a/samples/controlled_generation.py b/samples/controlled_generation.py index 042209a72..78c422464 100644 --- a/samples/controlled_generation.py +++ b/samples/controlled_generation.py @@ -139,7 +139,7 @@ class Choice(enum.Enum): response_mime_type="text/x.enum", response_schema=Choice ), ) - print(result) # Keyboard + print(result) # "Keyboard" # [END x_enum] def test_x_enum_raw(self): @@ -157,7 +157,7 @@ def test_x_enum_raw(self): }, ), ) - print(result) # Keyboard + print(result) # "Keyboard" # [END x_enum_raw] diff --git a/tests/test_async_code_match.py b/tests/test_async_code_match.py index 0ec4550d4..457200b7b 100644 --- a/tests/test_async_code_match.py +++ b/tests/test_async_code_match.py @@ -75,6 +75,7 @@ def _execute_code_match(self, source, asource): asource = re.sub(" *?# type: ignore", "", asource) self.assertEqual(source, asource) + @absltest.skip("This test is broken: globally matching functions based only on the name") def test_code_match_for_async_methods(self): for fpath in (pathlib.Path(__file__).parent.parent / "google").rglob("*.py"): if fpath.name in EXEMPT_FILES or any([d in fpath.parts for d in EXEMPT_DIRS]): From 0da9b2d5dbdde28581b435d33c4b78b225544ba9 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 2 Oct 2024 11:13:52 -0700 Subject: [PATCH 2/6] Remove block_fewest (#581) * Remove block_fewest * Format. Change-Id: Ib65044811345a97078723bb74092913007f517e6 * Remove allow_all --- .../vision_models/_vision_models.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py index 0bb4f7dbe..2db174d64 100644 --- a/google/generativeai/vision_models/_vision_models.py +++ b/google/generativeai/vision_models/_vision_models.py @@ -104,10 +104,10 @@ def to_mapping_value(value) -> struct_pb2.Struct: OutputMimeType = Literal["image/png", "image/jpeg"] OUTPUT_MIME_TYPES = OutputMimeType.__args__ # type: ignore -SafetyFilterLevel = Literal["block_most", "block_some", "block_few", "block_fewest"] +SafetyFilterLevel = Literal["block_most", "block_some", "block_few"] SAFETY_FILTER_LEVELS = SafetyFilterLevel.__args__ # type: ignore -PersonGeneration = Literal["dont_allow", "allow_adult", "allow_all"] +PersonGeneration = Literal["dont_allow", "allow_adult"] PERSON_GENERATIONS = PersonGeneration.__args__ # type: ignore @@ -277,14 +277,14 @@ def _generate_images( Japanese, `"ko"` for Korean, and `"auto"` for automatic language detection. safety_filter_level: Adds a filter level to Safety filtering. Supported - values are: * "block_most" : Strongest filtering level, most strict - blocking * "block_some" : Block some problematic prompts and responses - * "block_few" : Block fewer problematic prompts and responses * - "block_fewest" : Block very few problematic prompts and responses + values are: + * "block_most" : Strongest filtering level, most strict blocking + * "block_some" : Block some problematic prompts and responses + * "block_few" : Block fewer problematic prompts and responses person_generation: Allow generation of people by the model Supported - values are: * "dont_allow" : Block generation of people * - "allow_adult" : Generate adults, but not children * "allow_all" : - Generate adults and children + values are: + * "dont_allow" : Block generation of people + * "allow_adult" : Generate adults, but not children Returns: An `ImageGenerationResponse` object. @@ -408,12 +408,10 @@ def generate_images( blocking * "block_some" : Block some problematic prompts and responses * "block_few" : Block fewer problematic prompts and responses - * "block_fewest" : Block very few problematic prompts and responses person_generation: Allow generation of people by the model Supported values are: * "dont_allow" : Block generation of people * "allow_adult" : Generate adults, but not children - * "allow_all" : Generate adults and children Returns: An `ImageGenerationResponse` object. """ From e09b902274ebf405c55f0b3bd5b6affd61e5de02 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 7 Oct 2024 14:36:34 -0700 Subject: [PATCH 3/6] Update safety filter levels Change-Id: Idf5990bf6ebf33da07318c0348a42bb7efd04582 --- google/generativeai/vision_models/_vision_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py index 2db174d64..3e7dbfa22 100644 --- a/google/generativeai/vision_models/_vision_models.py +++ b/google/generativeai/vision_models/_vision_models.py @@ -104,7 +104,7 @@ def to_mapping_value(value) -> struct_pb2.Struct: OutputMimeType = Literal["image/png", "image/jpeg"] OUTPUT_MIME_TYPES = OutputMimeType.__args__ # type: ignore -SafetyFilterLevel = Literal["block_most", "block_some", "block_few"] +SafetyFilterLevel = Literal["block_low_and_above", "block_medium_and_above", "block_only_high"] SAFETY_FILTER_LEVELS = SafetyFilterLevel.__args__ # type: ignore PersonGeneration = Literal["dont_allow", "allow_adult"] From aae0cafba0a0e8a305b2f4011c2922df20dc3dac Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 29 Oct 2024 11:14:10 -0700 Subject: [PATCH 4/6] watermark (#603) * watermark Change-Id: I271993e07bfc034d89cd74013339046a21d0b472 * fix typing Change-Id: Ib026f3b66fb44c6ad35a15030b03204306b35443 * format Change-Id: I24304a80e6689b4fcec3155f7274e24db712b402 * fix test Change-Id: I68cb74f8ee5f224942417ea4e5fc4232ec688977 * make check_watermark a stand alone function Change-Id: I2d72620359dcc70fe8e720a14f78d83f75a42d90 * simplify typing. Change-Id: I1b901cc40b4b029cb09699fc6eac77690622b6e8 * Update google/generativeai/vision_models/_vision_models.py * Typo Co-authored-by: Mark McDonald --------- Co-authored-by: Mark McDonald --- google/generativeai/__init__.py | 1 - google/generativeai/types/content_types.py | 5 +- google/generativeai/vision_models/__init__.py | 1 + .../vision_models/_vision_models.py | 76 ++++++++++++++++++- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/google/generativeai/__init__.py b/google/generativeai/__init__.py index 73025a1b4..66af17641 100644 --- a/google/generativeai/__init__.py +++ b/google/generativeai/__init__.py @@ -79,7 +79,6 @@ __version__ = version.__version__ -del embedding del files del generative_models del models diff --git a/google/generativeai/types/content_types.py b/google/generativeai/types/content_types.py index f3db610e1..23241a536 100644 --- a/google/generativeai/types/content_types.py +++ b/google/generativeai/types/content_types.py @@ -35,6 +35,7 @@ import IPython.display IMAGE_TYPES = (PIL.Image.Image, IPython.display.Image) + ImageType = PIL.Image.Image | IPython.display.Image else: IMAGE_TYPES = () try: @@ -52,6 +53,8 @@ except ImportError: IPython = None + ImageType = Union["PIL.Image.Image", "IPython.display.Image"] + __all__ = [ "BlobDict", @@ -123,7 +126,7 @@ def webp_blob(image: PIL.Image.Image) -> protos.Blob: return file_blob(image) or webp_blob(image) -def image_to_blob(image) -> protos.Blob: +def image_to_blob(image: ImageType) -> protos.Blob: if PIL is not None: if isinstance(image, PIL.Image.Image): return _pil_to_blob(image) diff --git a/google/generativeai/vision_models/__init__.py b/google/generativeai/vision_models/__init__.py index 65a545831..2a4a27e32 100644 --- a/google/generativeai/vision_models/__init__.py +++ b/google/generativeai/vision_models/__init__.py @@ -15,6 +15,7 @@ """Classes for working with vision models.""" from google.generativeai.vision_models._vision_models import ( + check_watermark, Image, GeneratedImage, ImageGenerationModel, diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py index 3e7dbfa22..52ec689a9 100644 --- a/google/generativeai/vision_models/_vision_models.py +++ b/google/generativeai/vision_models/_vision_models.py @@ -18,15 +18,16 @@ import base64 import collections import dataclasses -import hashlib import io import json +import os import pathlib import typing from typing import Any, Dict, List, Literal, Optional, Union from google.generativeai import client from google.generativeai import protos +from google.generativeai.types import content_types from google.protobuf import struct_pb2 @@ -110,6 +111,52 @@ def to_mapping_value(value) -> struct_pb2.Struct: PersonGeneration = Literal["dont_allow", "allow_adult"] PERSON_GENERATIONS = PersonGeneration.__args__ # type: ignore +ImageLikeType = Union["Image", pathlib.Path, content_types.ImageType] + + +def check_watermark( + img: ImageLikeType, model_id: str = "models/image-verification-001" +) -> "CheckWatermarkResult": + """Checks if an image has a Google-AI watermark. + + Args: + img: can be a `pathlib.Path` or a `PIL.Image.Image`, `IPython.display.Image`, or `google.generativeai.Image`. + model_id: Which version of the image-verification model to send the image to. + + Returns: + + """ + if isinstance(img, Image): + pass + elif isinstance(img, pathlib.Path): + img = Image.load_from_file(img) + elif IPython_display is not None and isinstance(img, IPython_display.Image): + img = Image(image_bytes=img.data) + elif PIL_Image is not None and isinstance(img, PIL_Image.Image): + blob = content_types._pil_to_blob(img) + img = Image(image_bytes=blob.data) + elif isinstance(img, protos.Blob): + img = Image(image_bytes=img.data) + else: + raise TypeError( + f"Not implemented: Could not convert a {type(img)} into `Image`\n {img=}" + ) + + prediction_client = client.get_default_prediction_client() + if not model_id.startswith("models/"): + model_id = f"models/{model_id}" + + instance = {"image": {"bytesBase64Encoded": base64.b64encode(img._loaded_bytes).decode()}} + parameters = {"watermarkVerification": True} + + # This is to get around https://github.com/googleapis/proto-plus-python/issues/488 + pr = protos.PredictRequest.pb() + request = pr(model=model_id, instances=[to_value(instance)], parameters=to_value(parameters)) + + response = prediction_client.predict(request) + + return CheckWatermarkResult(response.predictions) + class Image: """Image.""" @@ -131,7 +178,7 @@ def __init__( self._image_bytes = image_bytes @staticmethod - def load_from_file(location: str) -> "Image": + def load_from_file(location: os.PathLike) -> "Image": """Loads image from local file or Google Cloud Storage. Args: @@ -206,6 +253,29 @@ def _as_base64_string(self) -> str: def _repr_png_(self): return self._pil_image._repr_png_() # type:ignore + check_watermark = check_watermark + + +class CheckWatermarkResult: + def __init__(self, predictions): + self._predictions = predictions + + @property + def decision(self): + return self._predictions[0]["decision"] + + def __str__(self): + return f"CheckWatermarkResult([{{'decision': {self.decision!r}}}])" + + def __bool__(self): + decision = self.decision + if decision == "ACCEPT": + return True + elif decision == "REJECT": + return False + else: + raise ValueError(f"Unrecognized result: {decision}") + class ImageGenerationModel: """Generates images from text prompt. @@ -479,7 +549,7 @@ def generation_parameters(self): return self._generation_parameters @staticmethod - def load_from_file(location: str) -> "GeneratedImage": + def load_from_file(location: os.PathLike) -> "GeneratedImage": """Loads image from file. Args: From c72aa6b3c417ad4f91c862de1482b0847b190e6e Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 1 Nov 2024 12:13:46 -0700 Subject: [PATCH 5/6] Image feedback (#608) * Allow the use of generated images as inputs Change-Id: I0956fb78272a8a8af2c5219d80a26dec944040a8 # Conflicts: # google/generativeai/vision_models/_vision_models.py * types + formatting Change-Id: I0cac4ba1de764d3c02c5eab7556d8324aeda1f93 * add files Change-Id: Ie7f91cef171c1f813b52ff1b2a4daedf7ea19edd * Fix 3.9 Change-Id: If9ff9ebc0b2bf16b91e741d862a9e2808c7a738a * Fix 3.9 Change-Id: Iee02352ca21fa66da9b097d4dfa9454b67609e79 * fix pytype Change-Id: Ic5c250f3f3ded2374abfbdbee6d62ea4cfb0f799 * fix pytype Change-Id: I431c66e45e7582218b5de7a90eeeee01b80df664 * typo Change-Id: I1bb15e1363c652f9c0b4a60dad834fce65a4f0a1 * reapply commits lost in merge Change-Id: I7bfebdeaa217d93ed5d11aca31cf0b20afd38c02 * Update google/generativeai/client.py * Remove GCS reference Change-Id: I5c1b8cbccee0e13d8aca70582a76e0c089e040ed * black . Change-Id: I2c24f8798cb8103d35474e7e6d2e4fc3100825aa --- google/generativeai/client.py | 95 ++++- google/generativeai/types/content_types.py | 98 +---- .../types/image_types/__init__.py | 1 + .../types/image_types/_image_types.py | 332 +++++++++++++++++ google/generativeai/vision_models/__init__.py | 6 +- .../vision_models/_vision_models.py | 335 +----------------- tests/test_content.py | 13 +- 7 files changed, 443 insertions(+), 437 deletions(-) create mode 100644 google/generativeai/types/image_types/__init__.py create mode 100644 google/generativeai/types/image_types/_image_types.py diff --git a/google/generativeai/client.py b/google/generativeai/client.py index a75643f1a..f53007be3 100644 --- a/google/generativeai/client.py +++ b/google/generativeai/client.py @@ -3,10 +3,11 @@ import os import contextlib import inspect +import collections import dataclasses import pathlib from typing import Any, cast -from collections.abc import Sequence +from collections.abc import Sequence, Mapping import httplib2 from io import IOBase @@ -23,6 +24,11 @@ import googleapiclient.http import googleapiclient.discovery +from google.protobuf import struct_pb2 + +from proto.marshal.collections import maps +from proto.marshal.collections import repeated + try: from google.generativeai import version @@ -130,6 +136,70 @@ async def create_file(self, *args, **kwargs): ) +# This is to get around https://github.com/googleapis/proto-plus-python/issues/488 +def to_value(value) -> struct_pb2.Value: + """Return a protobuf Value object representing this value.""" + if isinstance(value, struct_pb2.Value): + return value + if value is None: + return struct_pb2.Value(null_value=0) + if isinstance(value, bool): + return struct_pb2.Value(bool_value=value) + if isinstance(value, (int, float)): + return struct_pb2.Value(number_value=float(value)) + if isinstance(value, str): + return struct_pb2.Value(string_value=value) + if isinstance(value, collections.abc.Sequence): + return struct_pb2.Value(list_value=to_list_value(value)) + if isinstance(value, collections.abc.Mapping): + return struct_pb2.Value(struct_value=to_mapping_value(value)) + raise ValueError("Unable to coerce value: %r" % value) + + +def to_list_value(value) -> struct_pb2.ListValue: + # We got a proto, or else something we sent originally. + # Preserve the instance we have. + if isinstance(value, struct_pb2.ListValue): + return value + if isinstance(value, repeated.RepeatedComposite): + return struct_pb2.ListValue(values=[v for v in value.pb]) + + # We got a list (or something list-like); convert it. + return struct_pb2.ListValue(values=[to_value(v) for v in value]) + + +def to_mapping_value(value) -> struct_pb2.Struct: + # We got a proto, or else something we sent originally. + # Preserve the instance we have. + if isinstance(value, struct_pb2.Struct): + return value + if isinstance(value, maps.MapComposite): + return struct_pb2.Struct( + fields={k: v for k, v in value.pb.items()}, + ) + + # We got a dict (or something dict-like); convert it. + return struct_pb2.Struct(fields={k: to_value(v) for k, v in value.items()}) + + +class PredictionServiceClient(glm.PredictionServiceClient): + def predict(self, model=None, instances=None, parameters=None): + pr = protos.PredictRequest.pb() + request = pr( + model=model, instances=[to_value(i) for i in instances], parameters=to_value(parameters) + ) + return super().predict(request) + + +class PredictionServiceAsyncClient(glm.PredictionServiceAsyncClient): + async def predict(self, model=None, instances=None, parameters=None): + pr = protos.PredictRequest.pb() + request = pr( + model=model, instances=[to_value(i) for i in instances], parameters=to_value(parameters) + ) + return await super().predict(request) + + @dataclasses.dataclass class _ClientManager: client_config: dict[str, Any] = dataclasses.field(default_factory=dict) @@ -220,15 +290,20 @@ def configure( self.clients = {} def make_client(self, name): - if name == "file": - cls = FileServiceClient - elif name == "file_async": - cls = FileServiceAsyncClient - elif name.endswith("_async"): - name = name.split("_")[0] - cls = getattr(glm, name.title() + "ServiceAsyncClient") - else: - cls = getattr(glm, name.title() + "ServiceClient") + local_clients = { + "file": FileServiceClient, + "file_async": FileServiceAsyncClient, + "prediction": PredictionServiceClient, + "prediction_async": PredictionServiceAsyncClient, + } + cls = local_clients.get(name, None) + + if cls is None: + if name.endswith("_async"): + name = name.split("_")[0] + cls = getattr(glm, name.title() + "ServiceAsyncClient") + else: + cls = getattr(glm, name.title() + "ServiceClient") # Attempt to configure using defaults. if not self.client_config: diff --git a/google/generativeai/types/content_types.py b/google/generativeai/types/content_types.py index 23241a536..3eeababbb 100644 --- a/google/generativeai/types/content_types.py +++ b/google/generativeai/types/content_types.py @@ -16,45 +16,16 @@ from __future__ import annotations from collections.abc import Iterable, Mapping, Sequence -import io import inspect -import mimetypes -import pathlib -import typing from typing import Any, Callable, Union from typing_extensions import TypedDict import pydantic from google.generativeai.types import file_types +from google.generativeai.types.image_types import _image_types from google.generativeai import protos -if typing.TYPE_CHECKING: - import PIL.Image - import PIL.ImageFile - import IPython.display - - IMAGE_TYPES = (PIL.Image.Image, IPython.display.Image) - ImageType = PIL.Image.Image | IPython.display.Image -else: - IMAGE_TYPES = () - try: - import PIL.Image - import PIL.ImageFile - - IMAGE_TYPES = IMAGE_TYPES + (PIL.Image.Image,) - except ImportError: - PIL = None - - try: - import IPython.display - - IMAGE_TYPES = IMAGE_TYPES + (IPython.display.Image,) - except ImportError: - IPython = None - - ImageType = Union["PIL.Image.Image", "IPython.display.Image"] - __all__ = [ "BlobDict", @@ -97,62 +68,6 @@ def to_mode(x: ModeOptions) -> Mode: return _MODE[x] -def _pil_to_blob(image: PIL.Image.Image) -> protos.Blob: - # If the image is a local file, return a file-based blob without any modification. - # Otherwise, return a lossless WebP blob (same quality with optimized size). - def file_blob(image: PIL.Image.Image) -> protos.Blob | None: - if not isinstance(image, PIL.ImageFile.ImageFile) or image.filename is None: - return None - filename = str(image.filename) - if not pathlib.Path(filename).is_file(): - return None - - mime_type = image.get_format_mimetype() - image_bytes = pathlib.Path(filename).read_bytes() - - return protos.Blob(mime_type=mime_type, data=image_bytes) - - def webp_blob(image: PIL.Image.Image) -> protos.Blob: - # Reference: https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp - image_io = io.BytesIO() - image.save(image_io, format="webp", lossless=True) - image_io.seek(0) - - mime_type = "image/webp" - image_bytes = image_io.read() - - return protos.Blob(mime_type=mime_type, data=image_bytes) - - return file_blob(image) or webp_blob(image) - - -def image_to_blob(image: ImageType) -> protos.Blob: - if PIL is not None: - if isinstance(image, PIL.Image.Image): - return _pil_to_blob(image) - - if IPython is not None: - if isinstance(image, IPython.display.Image): - name = image.filename - if name is None: - raise ValueError( - "Conversion failed. The `IPython.display.Image` can only be converted if " - "it is constructed from a local file. Please ensure you are using the format: Image(filename='...')." - ) - mime_type, _ = mimetypes.guess_type(name) - if mime_type is None: - mime_type = "image/unknown" - - return protos.Blob(mime_type=mime_type, data=image.data) - - raise TypeError( - "Image conversion failed. The input was expected to be of type `Image` " - "(either `PIL.Image.Image` or `IPython.display.Image`).\n" - f"However, received an object of type: {type(image)}.\n" - f"Object Value: {image}" - ) - - class BlobDict(TypedDict): mime_type: str data: bytes @@ -189,12 +104,7 @@ def is_blob_dict(d): return "mime_type" in d and "data" in d -if typing.TYPE_CHECKING: - BlobType = Union[ - protos.Blob, BlobDict, PIL.Image.Image, IPython.display.Image - ] # Any for the images -else: - BlobType = Union[protos.Blob, BlobDict, Any] +BlobType = Union[protos.Blob, BlobDict, _image_types.ImageType] # Any for the images def to_blob(blob: BlobType) -> protos.Blob: @@ -203,8 +113,8 @@ def to_blob(blob: BlobType) -> protos.Blob: if isinstance(blob, protos.Blob): return blob - elif isinstance(blob, IMAGE_TYPES): - return image_to_blob(blob) + elif isinstance(blob, _image_types.IMAGE_TYPES): + return _image_types.image_to_blob(blob) else: if isinstance(blob, Mapping): raise KeyError( diff --git a/google/generativeai/types/image_types/__init__.py b/google/generativeai/types/image_types/__init__.py new file mode 100644 index 000000000..6e9d0a3fe --- /dev/null +++ b/google/generativeai/types/image_types/__init__.py @@ -0,0 +1 @@ +from google.generativeai.types.image_types._image_types import * diff --git a/google/generativeai/types/image_types/_image_types.py b/google/generativeai/types/image_types/_image_types.py new file mode 100644 index 000000000..ddfea057f --- /dev/null +++ b/google/generativeai/types/image_types/_image_types.py @@ -0,0 +1,332 @@ +import base64 +import io +import json +import mimetypes +import os +import pathlib +import typing +from typing import Any, Dict, Optional, Union + +from google.generativeai import protos +from google.generativeai import client + +# pylint: disable=g-import-not-at-top +if typing.TYPE_CHECKING: + import PIL.Image + import PIL.ImageFile + import IPython.display + + IMAGE_TYPES = (PIL.Image.Image, IPython.display.Image) + ImageType = PIL.Image.Image | IPython.display.Image +else: + IMAGE_TYPES = () + try: + import PIL.Image + import PIL.ImageFile + + IMAGE_TYPES = IMAGE_TYPES + (PIL.Image.Image,) + except ImportError: + PIL = None + + try: + import IPython.display + + IMAGE_TYPES = IMAGE_TYPES + (IPython.display.Image,) + except ImportError: + IPython = None + + ImageType = Union["Image", "PIL.Image.Image", "IPython.display.Image"] +# pylint: enable=g-import-not-at-top + +__all__ = ["Image", "GeneratedImage", "check_watermark", "CheckWatermarkResult", "ImageType"] + + +def _pil_to_blob(image: PIL.Image.Image) -> protos.Blob: + # If the image is a local file, return a file-based blob without any modification. + # Otherwise, return a lossless WebP blob (same quality with optimized size). + def file_blob(image: PIL.Image.Image) -> Union[protos.Blob, None]: + if not isinstance(image, PIL.ImageFile.ImageFile) or image.filename is None: + return None + filename = str(image.filename) + if not pathlib.Path(filename).is_file(): + return None + + mime_type = image.get_format_mimetype() + image_bytes = pathlib.Path(filename).read_bytes() + + return protos.Blob(mime_type=mime_type, data=image_bytes) + + def webp_blob(image: PIL.Image.Image) -> protos.Blob: + # Reference: https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp + image_io = io.BytesIO() + image.save(image_io, format="webp", lossless=True) + image_io.seek(0) + + mime_type = "image/webp" + image_bytes = image_io.read() + + return protos.Blob(mime_type=mime_type, data=image_bytes) + + return file_blob(image) or webp_blob(image) + + +def image_to_blob(image: ImageType) -> protos.Blob: + if PIL is not None: + if isinstance(image, PIL.Image.Image): + return _pil_to_blob(image) + + if IPython is not None: + if isinstance(image, IPython.display.Image): + name = image.filename + if name is None: + raise ValueError( + "Conversion failed. The `IPython.display.Image` can only be converted if " + "it is constructed from a local file. Please ensure you are using the format: Image(filename='...')." + ) + mime_type, _ = mimetypes.guess_type(name) + if mime_type is None: + mime_type = "image/unknown" + + return protos.Blob(mime_type=mime_type, data=image.data) + + if isinstance(image, Image): + return protos.Blob(mime_type=image._mime_type, data=image._image_bytes) + + raise TypeError( + "Image conversion failed. The input was expected to be of type `Image` " + "(either `PIL.Image.Image` or `IPython.display.Image`).\n" + f"However, received an object of type: {type(image)}.\n" + f"Object Value: {image}" + ) + + +class CheckWatermarkResult: + def __init__(self, predictions): + self._predictions = predictions + + @property + def decision(self): + return self._predictions[0]["decision"] + + def __str__(self): + return f"CheckWatermarkResult([{{'decision': {self.decision!r}}}])" + + def __bool__(self): + decision = self.decision + if decision == "ACCEPT": + return True + elif decision == "REJECT": + return False + else: + raise ValueError(f"Unrecognized result: {decision}") + + +def check_watermark( + img: Union[pathlib.Path, ImageType], model_id: str = "models/image-verification-001" +) -> "CheckWatermarkResult": + """Checks if an image has a Google-AI watermark. + + Args: + img: can be a `pathlib.Path` or a `PIL.Image.Image`, `IPython.display.Image`, or `google.generativeai.Image`. + model_id: Which version of the image-verification model to send the image to. + + Returns: + + """ + if isinstance(img, Image): + pass + elif isinstance(img, pathlib.Path): + img = Image.load_from_file(img) + elif IPython.display is not None and isinstance(img, IPython.display.Image): + img = Image(image_bytes=img.data) + elif PIL.Image is not None and isinstance(img, PIL.Image.Image): + blob = _pil_to_blob(img) + img = Image(image_bytes=blob.data) + elif isinstance(img, protos.Blob): + img = Image(image_bytes=img.data) + else: + raise TypeError( + f"Not implemented: Could not convert a {type(img)} into `Image`\n {img=}" + ) + + prediction_client = client.get_default_prediction_client() + if not model_id.startswith("models/"): + model_id = f"models/{model_id}" + + instance = {"image": {"bytesBase64Encoded": base64.b64encode(img._loaded_bytes).decode()}} + parameters = {"watermarkVerification": True} + + response = prediction_client.predict( + model=model_id, instances=[instance], parameters=parameters + ) + + return CheckWatermarkResult(response.predictions) + + +class Image: + """Image.""" + + __module__ = "vertexai.vision_models" + + _loaded_bytes: Optional[bytes] = None + _loaded_image: Optional["PIL.Image.Image"] = None + + def __init__( + self, + image_bytes: Optional[bytes], + ): + """Creates an `Image` object. + + Args: + image_bytes: Image file bytes. Image can be in PNG or JPEG format. + """ + self._image_bytes = image_bytes + + @staticmethod + def load_from_file(location: os.PathLike) -> "Image": + """Loads image from local file. + + Args: + location: Local path from where to load + the image. + + Returns: + Loaded image as an `Image` object. + """ + # Load image from local path + image_bytes = pathlib.Path(location).read_bytes() + image = Image(image_bytes=image_bytes) + return image + + @property + def _image_bytes(self) -> bytes: + return self._loaded_bytes + + @_image_bytes.setter + def _image_bytes(self, value: bytes): + self._loaded_bytes = value + + @property + def _pil_image(self) -> "PIL.Image.Image": # type: ignore + if self._loaded_image is None: + if not PIL: + raise RuntimeError( + "The PIL module is not available. Please install the Pillow package." + ) + self._loaded_image = PIL.Image.open(io.BytesIO(self._image_bytes)) + return self._loaded_image + + @property + def _size(self): + return self._pil_image.size + + @property + def _mime_type(self) -> str: + """Returns the MIME type of the image.""" + import PIL + + return PIL.Image.MIME.get(self._pil_image.format, "image/jpeg") + + def show(self): + """Shows the image. + + This method only works when in a notebook environment. + """ + if PIL and IPython: + IPython.display.display(self._pil_image) + + def save(self, location: str): + """Saves image to a file. + + Args: + location: Local path where to save the image. + """ + pathlib.Path(location).write_bytes(self._image_bytes) + + def _as_base64_string(self) -> str: + """Encodes image using the base64 encoding. + + Returns: + Base64 encoding of the image as a string. + """ + # ! b64encode returns `bytes` object, not `str`. + # We need to convert `bytes` to `str`, otherwise we get service error: + # "received initial metadata size exceeds limit" + return base64.b64encode(self._image_bytes).decode("ascii") + + def _repr_png_(self): + return self._pil_image._repr_png_() # type:ignore + + check_watermark = check_watermark + + +_EXIF_USER_COMMENT_TAG_IDX = 0x9286 +_IMAGE_GENERATION_PARAMETERS_EXIF_KEY = ( + "google.cloud.vertexai.image_generation.image_generation_parameters" +) + + +class GeneratedImage(Image): + """Generated image.""" + + __module__ = "google.generativeai" + + def __init__( + self, + image_bytes: Optional[bytes], + generation_parameters: Dict[str, Any], + ): + """Creates a `GeneratedImage` object. + + Args: + image_bytes: Image file bytes. Image can be in PNG or JPEG format. + generation_parameters: Image generation parameter values. + """ + super().__init__(image_bytes=image_bytes) + self._generation_parameters = generation_parameters + + @property + def generation_parameters(self): + """Image generation parameters as a dictionary.""" + return self._generation_parameters + + @staticmethod + def load_from_file(location: os.PathLike) -> "GeneratedImage": + """Loads image from file. + + Args: + location: Local path from where to load the image. + + Returns: + Loaded image as a `GeneratedImage` object. + """ + base_image = Image.load_from_file(location=location) + exif = base_image._pil_image.getexif() # pylint: disable=protected-access + exif_comment_dict = json.loads(exif[_EXIF_USER_COMMENT_TAG_IDX]) + generation_parameters = exif_comment_dict[_IMAGE_GENERATION_PARAMETERS_EXIF_KEY] + return GeneratedImage( + image_bytes=base_image._image_bytes, # pylint: disable=protected-access + generation_parameters=generation_parameters, + ) + + def save(self, location: str, include_generation_parameters: bool = True): + """Saves image to a file. + + Args: + location: Local path where to save the image. + include_generation_parameters: Whether to include the image + generation parameters in the image's EXIF metadata. + """ + if include_generation_parameters: + if not self._generation_parameters: + raise ValueError("Image does not have generation parameters.") + if not PIL: + raise ValueError("The PIL module is required for saving generation parameters.") + + exif = self._pil_image.getexif() + exif[_EXIF_USER_COMMENT_TAG_IDX] = json.dumps( + {_IMAGE_GENERATION_PARAMETERS_EXIF_KEY: self._generation_parameters} + ) + self._pil_image.save(location, exif=exif) + else: + super().save(location=location) diff --git a/google/generativeai/vision_models/__init__.py b/google/generativeai/vision_models/__init__.py index 2a4a27e32..e1b62d39b 100644 --- a/google/generativeai/vision_models/__init__.py +++ b/google/generativeai/vision_models/__init__.py @@ -14,15 +14,15 @@ # """Classes for working with vision models.""" +from google.generativeai.types.image_types import check_watermark, Image, GeneratedImage + from google.generativeai.vision_models._vision_models import ( - check_watermark, - Image, - GeneratedImage, ImageGenerationModel, ImageGenerationResponse, ) __all__ = [ + "check_watermark", "Image", "GeneratedImage", "ImageGenerationModel", diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py index 52ec689a9..f89ab86e6 100644 --- a/google/generativeai/vision_models/_vision_models.py +++ b/google/generativeai/vision_models/_vision_models.py @@ -16,88 +16,12 @@ """Classes for working with vision models.""" import base64 -import collections import dataclasses -import io -import json -import os -import pathlib import typing -from typing import Any, Dict, List, Literal, Optional, Union +from typing import List, Literal, Optional from google.generativeai import client -from google.generativeai import protos -from google.generativeai.types import content_types - -from google.protobuf import struct_pb2 - -from proto.marshal.collections import maps -from proto.marshal.collections import repeated - - -# pylint: disable=g-import-not-at-top\ -if typing.TYPE_CHECKING: - from IPython import display as IPython_display -else: - try: - from IPython import display as IPython_display - except ImportError: - IPython_display = None - -if typing.TYPE_CHECKING: - import PIL.Image as PIL_Image -else: - try: - from PIL import Image as PIL_Image - except ImportError: - PIL_Image = None - - -# This is to get around https://github.com/googleapis/proto-plus-python/issues/488 -def to_value(value) -> struct_pb2.Value: - """Return a protobuf Value object representing this value.""" - if isinstance(value, struct_pb2.Value): - return value - if value is None: - return struct_pb2.Value(null_value=0) - if isinstance(value, bool): - return struct_pb2.Value(bool_value=value) - if isinstance(value, (int, float)): - return struct_pb2.Value(number_value=float(value)) - if isinstance(value, str): - return struct_pb2.Value(string_value=value) - if isinstance(value, collections.abc.Sequence): - return struct_pb2.Value(list_value=to_list_value(value)) - if isinstance(value, collections.abc.Mapping): - return struct_pb2.Value(struct_value=to_mapping_value(value)) - raise ValueError("Unable to coerce value: %r" % value) - - -def to_list_value(value) -> struct_pb2.ListValue: - # We got a proto, or else something we sent originally. - # Preserve the instance we have. - if isinstance(value, struct_pb2.ListValue): - return value - if isinstance(value, repeated.RepeatedComposite): - return struct_pb2.ListValue(values=[v for v in value.pb]) - - # We got a list (or something list-like); convert it. - return struct_pb2.ListValue(values=[to_value(v) for v in value]) - - -def to_mapping_value(value) -> struct_pb2.Struct: - # We got a proto, or else something we sent originally. - # Preserve the instance we have. - if isinstance(value, struct_pb2.Struct): - return value - if isinstance(value, maps.MapComposite): - return struct_pb2.Struct( - fields={k: v for k, v in value.pb.items()}, - ) - - # We got a dict (or something dict-like); convert it. - return struct_pb2.Struct(fields={k: to_value(v) for k, v in value.items()}) - +from google.generativeai.types import image_types AspectRatio = Literal["1:1", "9:16", "16:9", "4:3", "3:4"] ASPECT_RATIOS = AspectRatio.__args__ # type: ignore @@ -111,171 +35,6 @@ def to_mapping_value(value) -> struct_pb2.Struct: PersonGeneration = Literal["dont_allow", "allow_adult"] PERSON_GENERATIONS = PersonGeneration.__args__ # type: ignore -ImageLikeType = Union["Image", pathlib.Path, content_types.ImageType] - - -def check_watermark( - img: ImageLikeType, model_id: str = "models/image-verification-001" -) -> "CheckWatermarkResult": - """Checks if an image has a Google-AI watermark. - - Args: - img: can be a `pathlib.Path` or a `PIL.Image.Image`, `IPython.display.Image`, or `google.generativeai.Image`. - model_id: Which version of the image-verification model to send the image to. - - Returns: - - """ - if isinstance(img, Image): - pass - elif isinstance(img, pathlib.Path): - img = Image.load_from_file(img) - elif IPython_display is not None and isinstance(img, IPython_display.Image): - img = Image(image_bytes=img.data) - elif PIL_Image is not None and isinstance(img, PIL_Image.Image): - blob = content_types._pil_to_blob(img) - img = Image(image_bytes=blob.data) - elif isinstance(img, protos.Blob): - img = Image(image_bytes=img.data) - else: - raise TypeError( - f"Not implemented: Could not convert a {type(img)} into `Image`\n {img=}" - ) - - prediction_client = client.get_default_prediction_client() - if not model_id.startswith("models/"): - model_id = f"models/{model_id}" - - instance = {"image": {"bytesBase64Encoded": base64.b64encode(img._loaded_bytes).decode()}} - parameters = {"watermarkVerification": True} - - # This is to get around https://github.com/googleapis/proto-plus-python/issues/488 - pr = protos.PredictRequest.pb() - request = pr(model=model_id, instances=[to_value(instance)], parameters=to_value(parameters)) - - response = prediction_client.predict(request) - - return CheckWatermarkResult(response.predictions) - - -class Image: - """Image.""" - - __module__ = "vertexai.vision_models" - - _loaded_bytes: Optional[bytes] = None - _loaded_image: Optional["PIL_Image.Image"] = None - - def __init__( - self, - image_bytes: Optional[bytes], - ): - """Creates an `Image` object. - - Args: - image_bytes: Image file bytes. Image can be in PNG or JPEG format. - """ - self._image_bytes = image_bytes - - @staticmethod - def load_from_file(location: os.PathLike) -> "Image": - """Loads image from local file or Google Cloud Storage. - - Args: - location: Local path or Google Cloud Storage uri from where to load - the image. - - Returns: - Loaded image as an `Image` object. - """ - # Load image from local path - image_bytes = pathlib.Path(location).read_bytes() - image = Image(image_bytes=image_bytes) - return image - - @property - def _image_bytes(self) -> bytes: - return self._loaded_bytes - - @_image_bytes.setter - def _image_bytes(self, value: bytes): - self._loaded_bytes = value - - @property - def _pil_image(self) -> "PIL_Image.Image": # type: ignore - if self._loaded_image is None: - if not PIL_Image: - raise RuntimeError( - "The PIL module is not available. Please install the Pillow package." - ) - self._loaded_image = PIL_Image.open(io.BytesIO(self._image_bytes)) - return self._loaded_image - - @property - def _size(self): - return self._pil_image.size - - @property - def _mime_type(self) -> str: - """Returns the MIME type of the image.""" - if PIL_Image: - return PIL_Image.MIME.get(self._pil_image.format, "image/jpeg") - # Fall back to jpeg - return "image/jpeg" - - def show(self): - """Shows the image. - - This method only works when in a notebook environment. - """ - if PIL_Image and IPython_display: - IPython_display.display(self._pil_image) - - def save(self, location: str): - """Saves image to a file. - - Args: - location: Local path where to save the image. - """ - pathlib.Path(location).write_bytes(self._image_bytes) - - def _as_base64_string(self) -> str: - """Encodes image using the base64 encoding. - - Returns: - Base64 encoding of the image as a string. - """ - # ! b64encode returns `bytes` object, not `str`. - # We need to convert `bytes` to `str`, otherwise we get service error: - # "received initial metadata size exceeds limit" - return base64.b64encode(self._image_bytes).decode("ascii") - - def _repr_png_(self): - return self._pil_image._repr_png_() # type:ignore - - check_watermark = check_watermark - - -class CheckWatermarkResult: - def __init__(self, predictions): - self._predictions = predictions - - @property - def decision(self): - return self._predictions[0]["decision"] - - def __str__(self): - return f"CheckWatermarkResult([{{'decision': {self.decision!r}}}])" - - def __bool__(self): - decision = self.decision - if decision == "ACCEPT": - return True - elif decision == "REJECT": - return False - else: - raise ValueError(f"Unrecognized result: {decision}") - class ImageGenerationModel: """Generates images from text prompt. @@ -417,20 +176,16 @@ def _generate_images( parameters["personGeneration"] = person_generation shared_generation_parameters["person_generation"] = person_generation - # This is to get around https://github.com/googleapis/proto-plus-python/issues/488 - pr = protos.PredictRequest.pb() - request = pr( - model=self.model_name, instances=[to_value(instance)], parameters=to_value(parameters) + response = self._client.predict( + model=self.model_name, instances=[instance], parameters=parameters ) - response = self._client.predict(request) - - generated_images: List["GeneratedImage"] = [] + generated_images: List[image_types.GeneratedImage] = [] for idx, prediction in enumerate(response.predictions): generation_parameters = dict(shared_generation_parameters) generation_parameters["index_of_image_in_batch"] = idx encoded_bytes = prediction.get("bytesBase64Encoded") - generated_image = GeneratedImage( + generated_image = image_types.GeneratedImage( image_bytes=base64.b64decode(encoded_bytes) if encoded_bytes else None, generation_parameters=generation_parameters, ) @@ -507,84 +262,12 @@ class ImageGenerationResponse: __module__ = "vertexai.preview.vision_models" - images: List["GeneratedImage"] + images: List[image_types.GeneratedImage] - def __iter__(self) -> typing.Iterator["GeneratedImage"]: + def __iter__(self) -> typing.Iterator[image_types.GeneratedImage]: """Iterates through the generated images.""" yield from self.images - def __getitem__(self, idx: int) -> "GeneratedImage": + def __getitem__(self, idx: int) -> image_types.GeneratedImage: """Gets the generated image by index.""" return self.images[idx] - - -_EXIF_USER_COMMENT_TAG_IDX = 0x9286 -_IMAGE_GENERATION_PARAMETERS_EXIF_KEY = ( - "google.cloud.vertexai.image_generation.image_generation_parameters" -) - - -class GeneratedImage(Image): - """Generated image.""" - - __module__ = "google.generativeai" - - def __init__( - self, - image_bytes: Optional[bytes], - generation_parameters: Dict[str, Any], - ): - """Creates a `GeneratedImage` object. - - Args: - image_bytes: Image file bytes. Image can be in PNG or JPEG format. - generation_parameters: Image generation parameter values. - """ - super().__init__(image_bytes=image_bytes) - self._generation_parameters = generation_parameters - - @property - def generation_parameters(self): - """Image generation parameters as a dictionary.""" - return self._generation_parameters - - @staticmethod - def load_from_file(location: os.PathLike) -> "GeneratedImage": - """Loads image from file. - - Args: - location: Local path from where to load the image. - - Returns: - Loaded image as a `GeneratedImage` object. - """ - base_image = Image.load_from_file(location=location) - exif = base_image._pil_image.getexif() # pylint: disable=protected-access - exif_comment_dict = json.loads(exif[_EXIF_USER_COMMENT_TAG_IDX]) - generation_parameters = exif_comment_dict[_IMAGE_GENERATION_PARAMETERS_EXIF_KEY] - return GeneratedImage( - image_bytes=base_image._image_bytes, # pylint: disable=protected-access - generation_parameters=generation_parameters, - ) - - def save(self, location: str, include_generation_parameters: bool = True): - """Saves image to a file. - - Args: - location: Local path where to save the image. - include_generation_parameters: Whether to include the image - generation parameters in the image's EXIF metadata. - """ - if include_generation_parameters: - if not self._generation_parameters: - raise ValueError("Image does not have generation parameters.") - if not PIL_Image: - raise ValueError("The PIL module is required for saving generation parameters.") - - exif = self._pil_image.getexif() - exif[_EXIF_USER_COMMENT_TAG_IDX] = json.dumps( - {_IMAGE_GENERATION_PARAMETERS_EXIF_KEY: self._generation_parameters} - ) - self._pil_image.save(location, exif=exif) - else: - super().save(location=location) diff --git a/tests/test_content.py b/tests/test_content.py index 2031e40ae..8bec14a9c 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -22,6 +22,8 @@ from absl.testing import parameterized from google.generativeai import protos from google.generativeai.types import content_types +from google.generativeai.types import image_types +from google.generativeai.types.image_types import _image_types import IPython.display import PIL.Image @@ -90,7 +92,7 @@ class UnitTests(parameterized.TestCase): ["P", PIL.Image.fromarray(np.zeros([6, 6, 3], dtype=np.uint8)).convert("P")], ) def test_numpy_to_blob(self, image): - blob = content_types.image_to_blob(image) + blob = _image_types.image_to_blob(image) self.assertIsInstance(blob, protos.Blob) self.assertEqual(blob.mime_type, "image/webp") self.assertStartsWith(blob.data, b"RIFF \x00\x00\x00WEBPVP8L") @@ -98,9 +100,10 @@ def test_numpy_to_blob(self, image): @parameterized.named_parameters( ["PIL", PIL.Image.open(TEST_PNG_PATH)], ["IPython", IPython.display.Image(filename=TEST_PNG_PATH)], + ["image_types.Image", image_types.Image.load_from_file(TEST_PNG_PATH)], ) def test_png_to_blob(self, image): - blob = content_types.image_to_blob(image) + blob = _image_types.image_to_blob(image) self.assertIsInstance(blob, protos.Blob) self.assertEqual(blob.mime_type, "image/png") self.assertStartsWith(blob.data, b"\x89PNG") @@ -108,9 +111,10 @@ def test_png_to_blob(self, image): @parameterized.named_parameters( ["PIL", PIL.Image.open(TEST_JPG_PATH)], ["IPython", IPython.display.Image(filename=TEST_JPG_PATH)], + ["image_types.Image", image_types.Image.load_from_file(TEST_JPG_PATH)], ) def test_jpg_to_blob(self, image): - blob = content_types.image_to_blob(image) + blob = _image_types.image_to_blob(image) self.assertIsInstance(blob, protos.Blob) self.assertEqual(blob.mime_type, "image/jpeg") self.assertStartsWith(blob.data, b"\xff\xd8\xff\xe0\x00\x10JFIF") @@ -118,9 +122,10 @@ def test_jpg_to_blob(self, image): @parameterized.named_parameters( ["PIL", PIL.Image.open(TEST_GIF_PATH)], ["IPython", IPython.display.Image(filename=TEST_GIF_PATH)], + ["image_types.Image", image_types.Image.load_from_file(TEST_GIF_PATH)], ) def test_gif_to_blob(self, image): - blob = content_types.image_to_blob(image) + blob = _image_types.image_to_blob(image) self.assertIsInstance(blob, protos.Blob) self.assertEqual(blob.mime_type, "image/gif") self.assertStartsWith(blob.data, b"GIF87a") From 2786f33edbabc441b247e549025b3e9e5e849895 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 12 Nov 2024 11:31:49 -0800 Subject: [PATCH 6/6] Remove watermark (#631) * Remove watermark Change-Id: Ie8f26fb7c4eb26c4cf7bdd746b608a652dfbd5a6 * format Change-Id: Id839dd464fbb8413f4a07603d8f6af1626353b7a --- .../types/image_types/_image_types.py | 47 +------------------ google/generativeai/vision_models/__init__.py | 3 +- 2 files changed, 2 insertions(+), 48 deletions(-) diff --git a/google/generativeai/types/image_types/_image_types.py b/google/generativeai/types/image_types/_image_types.py index ddfea057f..f23c343aa 100644 --- a/google/generativeai/types/image_types/_image_types.py +++ b/google/generativeai/types/image_types/_image_types.py @@ -8,7 +8,6 @@ from typing import Any, Dict, Optional, Union from google.generativeai import protos -from google.generativeai import client # pylint: disable=g-import-not-at-top if typing.TYPE_CHECKING: @@ -38,7 +37,7 @@ ImageType = Union["Image", "PIL.Image.Image", "IPython.display.Image"] # pylint: enable=g-import-not-at-top -__all__ = ["Image", "GeneratedImage", "check_watermark", "CheckWatermarkResult", "ImageType"] +__all__ = ["Image", "GeneratedImage", "ImageType"] def _pil_to_blob(image: PIL.Image.Image) -> protos.Blob: @@ -121,48 +120,6 @@ def __bool__(self): raise ValueError(f"Unrecognized result: {decision}") -def check_watermark( - img: Union[pathlib.Path, ImageType], model_id: str = "models/image-verification-001" -) -> "CheckWatermarkResult": - """Checks if an image has a Google-AI watermark. - - Args: - img: can be a `pathlib.Path` or a `PIL.Image.Image`, `IPython.display.Image`, or `google.generativeai.Image`. - model_id: Which version of the image-verification model to send the image to. - - Returns: - - """ - if isinstance(img, Image): - pass - elif isinstance(img, pathlib.Path): - img = Image.load_from_file(img) - elif IPython.display is not None and isinstance(img, IPython.display.Image): - img = Image(image_bytes=img.data) - elif PIL.Image is not None and isinstance(img, PIL.Image.Image): - blob = _pil_to_blob(img) - img = Image(image_bytes=blob.data) - elif isinstance(img, protos.Blob): - img = Image(image_bytes=img.data) - else: - raise TypeError( - f"Not implemented: Could not convert a {type(img)} into `Image`\n {img=}" - ) - - prediction_client = client.get_default_prediction_client() - if not model_id.startswith("models/"): - model_id = f"models/{model_id}" - - instance = {"image": {"bytesBase64Encoded": base64.b64encode(img._loaded_bytes).decode()}} - parameters = {"watermarkVerification": True} - - response = prediction_client.predict( - model=model_id, instances=[instance], parameters=parameters - ) - - return CheckWatermarkResult(response.predictions) - - class Image: """Image.""" @@ -257,8 +214,6 @@ def _as_base64_string(self) -> str: def _repr_png_(self): return self._pil_image._repr_png_() # type:ignore - check_watermark = check_watermark - _EXIF_USER_COMMENT_TAG_IDX = 0x9286 _IMAGE_GENERATION_PARAMETERS_EXIF_KEY = ( diff --git a/google/generativeai/vision_models/__init__.py b/google/generativeai/vision_models/__init__.py index e1b62d39b..c0ab97b9c 100644 --- a/google/generativeai/vision_models/__init__.py +++ b/google/generativeai/vision_models/__init__.py @@ -14,7 +14,7 @@ # """Classes for working with vision models.""" -from google.generativeai.types.image_types import check_watermark, Image, GeneratedImage +from google.generativeai.types.image_types import Image, GeneratedImage from google.generativeai.vision_models._vision_models import ( ImageGenerationModel, @@ -22,7 +22,6 @@ ) __all__ = [ - "check_watermark", "Image", "GeneratedImage", "ImageGenerationModel",