diff --git a/packages/google-cloud-documentai/CONTRIBUTING.rst b/packages/google-cloud-documentai/CONTRIBUTING.rst index 2b320e2af7b3..45855b998443 100644 --- a/packages/google-cloud-documentai/CONTRIBUTING.rst +++ b/packages/google-cloud-documentai/CONTRIBUTING.rst @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system- -- -k + $ nox -s system-3.11 -- -k .. note:: - System tests are only configured to run under Python. + System tests are only configured to run under Python 3.8, 3.9, 3.10 and 3.11. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local diff --git a/packages/google-cloud-documentai/docs/documentai_v1beta3/document_service.rst b/packages/google-cloud-documentai/docs/documentai_v1beta3/document_service.rst index 85f24b292a51..4ac25775f8b6 100644 --- a/packages/google-cloud-documentai/docs/documentai_v1beta3/document_service.rst +++ b/packages/google-cloud-documentai/docs/documentai_v1beta3/document_service.rst @@ -4,3 +4,7 @@ DocumentService .. automodule:: google.cloud.documentai_v1beta3.services.document_service :members: :inherited-members: + +.. automodule:: google.cloud.documentai_v1beta3.services.document_service.pagers + :members: + :inherited-members: diff --git a/packages/google-cloud-documentai/google/cloud/documentai/gapic_version.py b/packages/google-cloud-documentai/google/cloud/documentai/gapic_version.py index 897a8fc0d7c8..360a0d13ebdd 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai/gapic_version.py +++ b/packages/google-cloud-documentai/google/cloud/documentai/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.19.0" # {x-release-please-version} +__version__ = "0.0.0" # {x-release-please-version} diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1/gapic_version.py b/packages/google-cloud-documentai/google/cloud/documentai_v1/gapic_version.py index 897a8fc0d7c8..360a0d13ebdd 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1/gapic_version.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.19.0" # {x-release-please-version} +__version__ = "0.0.0" # {x-release-please-version} diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_io.py b/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_io.py index 8f7659380a5a..493827a72442 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_io.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_io.py @@ -48,7 +48,7 @@ class RawDocument(proto.Message): The display name of the document, it supports all Unicode characters except the following: ``*``, ``?``, ``[``, ``]``, ``%``, ``{``, ``}``,\ ``'``, ``\"``, ``,`` ``~``, ``=`` and - ``:`` are reserved. If not specified, a default ID will be + ``:`` are reserved. If not specified, a default ID is generated. """ @@ -260,8 +260,16 @@ class OcrConfig(proto.Message): Includes symbol level OCR information if set to true. compute_style_info (bool): - Turn on font id model and returns font style information. - Use PremiumFeatures.compute_style_info instead. + Turn on font identification model and return font style + information. Deprecated, use + [PremiumFeatures.compute_style_info][google.cloud.documentai.v1.OcrConfig.PremiumFeatures.compute_style_info] + instead. + disable_character_boxes_detection (bool): + Turn off character box detector in OCR + engine. Character box detection is enabled by + default in OCR 2.0+ processors. + premium_features (google.cloud.documentai_v1.types.OcrConfig.PremiumFeatures): + Configurations for premium OCR features. """ class Hints(proto.Message): @@ -285,6 +293,34 @@ class Hints(proto.Message): number=1, ) + class PremiumFeatures(proto.Message): + r"""Configurations for premium OCR features. + + Attributes: + enable_selection_mark_detection (bool): + Turn on selection mark detector in OCR + engine. Only available in OCR 2.0+ processors. + compute_style_info (bool): + Turn on font identification model and return + font style information. + enable_math_ocr (bool): + Turn on the model that can extract LaTeX math + formulas. + """ + + enable_selection_mark_detection: bool = proto.Field( + proto.BOOL, + number=3, + ) + compute_style_info: bool = proto.Field( + proto.BOOL, + number=4, + ) + enable_math_ocr: bool = proto.Field( + proto.BOOL, + number=5, + ) + hints: Hints = proto.Field( proto.MESSAGE, number=2, @@ -310,6 +346,15 @@ class Hints(proto.Message): proto.BOOL, number=8, ) + disable_character_boxes_detection: bool = proto.Field( + proto.BOOL, + number=10, + ) + premium_features: PremiumFeatures = proto.Field( + proto.MESSAGE, + number=11, + message=PremiumFeatures, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_processor_service.py b/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_processor_service.py index 12fe1b1c7ed6..4a3c8dbeb9ff 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_processor_service.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1/types/document_processor_service.py @@ -90,12 +90,63 @@ class ProcessOptions(proto.Message): r"""Options for Process API + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: + individual_page_selector (google.cloud.documentai_v1.types.ProcessOptions.IndividualPageSelector): + Which pages to process (1-indexed). + + This field is a member of `oneof`_ ``page_range``. + from_start (int): + Only process certain pages from the start. + Process all if the document has fewer pages. + + This field is a member of `oneof`_ ``page_range``. + from_end (int): + Only process certain pages from the end, same + as above. + + This field is a member of `oneof`_ ``page_range``. ocr_config (google.cloud.documentai_v1.types.OcrConfig): Only applicable to ``OCR_PROCESSOR``. Returns error if set on other processor types. """ + class IndividualPageSelector(proto.Message): + r"""A list of individual page numbers. + + Attributes: + pages (MutableSequence[int]): + Optional. Indices of the pages (starting from + 1). + """ + + pages: MutableSequence[int] = proto.RepeatedField( + proto.INT32, + number=1, + ) + + individual_page_selector: IndividualPageSelector = proto.Field( + proto.MESSAGE, + number=5, + oneof="page_range", + message=IndividualPageSelector, + ) + from_start: int = proto.Field( + proto.INT32, + number=6, + oneof="page_range", + ) + from_end: int = proto.Field( + proto.INT32, + number=7, + oneof="page_range", + ) ocr_config: document_io.OcrConfig = proto.Field( proto.MESSAGE, number=1, diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta2/gapic_version.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta2/gapic_version.py index 897a8fc0d7c8..360a0d13ebdd 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta2/gapic_version.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.19.0" # {x-release-please-version} +__version__ = "0.0.0" # {x-release-please-version} diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/__init__.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/__init__.py index 64985136bc85..dd7585668b97 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/__init__.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/__init__.py @@ -90,12 +90,20 @@ UndeployProcessorVersionRequest, UndeployProcessorVersionResponse, ) -from .types.document_schema import DocumentSchema, EntityTypeMetadata, PropertyMetadata +from .types.document_schema import ( + DocumentSchema, + EntityTypeMetadata, + FieldExtractionMetadata, + PropertyMetadata, + SummaryOptions, +) from .types.document_service import ( BatchDeleteDocumentsMetadata, BatchDeleteDocumentsRequest, BatchDeleteDocumentsResponse, DatasetSplitType, + DocumentLabelingState, + DocumentMetadata, DocumentPageRange, GetDatasetSchemaRequest, GetDocumentRequest, @@ -103,6 +111,8 @@ ImportDocumentsMetadata, ImportDocumentsRequest, ImportDocumentsResponse, + ListDocumentsRequest, + ListDocumentsResponse, UpdateDatasetOperationMetadata, UpdateDatasetRequest, UpdateDatasetSchemaRequest, @@ -110,7 +120,7 @@ from .types.evaluation import Evaluation, EvaluationReference from .types.geometry import BoundingPoly, NormalizedVertex, Vertex from .types.operation_metadata import CommonOperationMetadata -from .types.processor import Processor, ProcessorVersion +from .types.processor import Processor, ProcessorVersion, ProcessorVersionAlias from .types.processor_type import ProcessorType __all__ = ( @@ -143,6 +153,8 @@ "DisableProcessorResponse", "Document", "DocumentId", + "DocumentLabelingState", + "DocumentMetadata", "DocumentOutputConfig", "DocumentPageRange", "DocumentProcessorServiceClient", @@ -159,6 +171,7 @@ "EvaluationReference", "FetchProcessorTypesRequest", "FetchProcessorTypesResponse", + "FieldExtractionMetadata", "GcsDocument", "GcsDocuments", "GcsPrefix", @@ -176,6 +189,8 @@ "ImportProcessorVersionMetadata", "ImportProcessorVersionRequest", "ImportProcessorVersionResponse", + "ListDocumentsRequest", + "ListDocumentsResponse", "ListEvaluationsRequest", "ListEvaluationsResponse", "ListProcessorTypesRequest", @@ -192,6 +207,7 @@ "Processor", "ProcessorType", "ProcessorVersion", + "ProcessorVersionAlias", "PropertyMetadata", "RawDocument", "ReviewDocumentOperationMetadata", @@ -201,6 +217,7 @@ "SetDefaultProcessorVersionMetadata", "SetDefaultProcessorVersionRequest", "SetDefaultProcessorVersionResponse", + "SummaryOptions", "TrainProcessorVersionMetadata", "TrainProcessorVersionRequest", "TrainProcessorVersionResponse", diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_metadata.json b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_metadata.json index 7853ba97a3b6..9d32da0a443b 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_metadata.json +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_metadata.json @@ -394,6 +394,11 @@ "import_documents" ] }, + "ListDocuments": { + "methods": [ + "list_documents" + ] + }, "UpdateDataset": { "methods": [ "update_dataset" @@ -429,6 +434,11 @@ "import_documents" ] }, + "ListDocuments": { + "methods": [ + "list_documents" + ] + }, "UpdateDataset": { "methods": [ "update_dataset" @@ -464,6 +474,11 @@ "import_documents" ] }, + "ListDocuments": { + "methods": [ + "list_documents" + ] + }, "UpdateDataset": { "methods": [ "update_dataset" diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_version.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_version.py index 897a8fc0d7c8..360a0d13ebdd 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_version.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.19.0" # {x-release-please-version} +__version__ = "0.0.0" # {x-release-please-version} diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py index a0c715827ead..9a57bb3aa367 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py @@ -2918,21 +2918,20 @@ async def sample_import_processor_version(): request (Optional[Union[google.cloud.documentai_v1beta3.types.ImportProcessorVersionRequest, dict]]): The request object. The request message for the [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] - method. Requirements: + method. - - The Document AI `Service - Agent `__ - of the destination project must have `Document AI - Editor - role `__ - on the source project. + The Document AI `Service + Agent `__ + of the destination project must have `Document AI Editor + role `__ + on the source project. The destination project is specified as part of the [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent] field. The source project is specified as part of the - [source][ImportProcessorVersionRequest.processor_version_source + [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source] or - ImportProcessorVersionRequest.external_processor_version_source] + [external_processor_version_source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.external_processor_version_source] field. parent (:class:`str`): Required. The destination processor name to create the diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/client.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/client.py index dbf6d367a0a6..17711be9b4a8 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/client.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/client.py @@ -3217,21 +3217,20 @@ def sample_import_processor_version(): request (Union[google.cloud.documentai_v1beta3.types.ImportProcessorVersionRequest, dict]): The request object. The request message for the [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] - method. Requirements: + method. - - The Document AI `Service - Agent `__ - of the destination project must have `Document AI - Editor - role `__ - on the source project. + The Document AI `Service + Agent `__ + of the destination project must have `Document AI Editor + role `__ + on the source project. The destination project is specified as part of the [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent] field. The source project is specified as part of the - [source][ImportProcessorVersionRequest.processor_version_source + [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source] or - ImportProcessorVersionRequest.external_processor_version_source] + [external_processor_version_source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.external_processor_version_source] field. parent (str): Required. The destination processor name to create the diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/transports/rest.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/transports/rest.py index ef24fde61238..9c72f032dd23 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/transports/rest.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/transports/rest.py @@ -2414,21 +2414,20 @@ def __call__( request (~.document_processor_service.ImportProcessorVersionRequest): The request object. The request message for the [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] - method. Requirements: + method. - - The Document AI `Service - Agent `__ - of the destination project must have `Document AI - Editor - role `__ - on the source project. + The Document AI `Service + Agent `__ + of the destination project must have `Document AI Editor + role `__ + on the source project. The destination project is specified as part of the [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent] field. The source project is specified as part of the - [source][ImportProcessorVersionRequest.processor_version_source + [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source] or - ImportProcessorVersionRequest.external_processor_version_source] + [external_processor_version_source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.external_processor_version_source] field. retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/async_client.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/async_client.py index ee5c56fa6de3..200961fededd 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/async_client.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/async_client.py @@ -48,6 +48,7 @@ from google.longrunning import operations_pb2 # type: ignore from google.protobuf import field_mask_pb2 # type: ignore +from google.cloud.documentai_v1beta3.services.document_service import pagers from google.cloud.documentai_v1beta3.types import ( document, document_schema, @@ -593,6 +594,120 @@ async def sample_get_document(): # Done; return the response. return response + async def list_documents( + self, + request: Optional[Union[document_service.ListDocumentsRequest, dict]] = None, + *, + dataset: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> pagers.ListDocumentsAsyncPager: + r"""Returns a list of documents present in the dataset. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google.cloud import documentai_v1beta3 + + async def sample_list_documents(): + # Create a client + client = documentai_v1beta3.DocumentServiceAsyncClient() + + # Initialize request argument(s) + request = documentai_v1beta3.ListDocumentsRequest( + dataset="dataset_value", + ) + + # Make the request + page_result = client.list_documents(request=request) + + # Handle the response + async for response in page_result: + print(response) + + Args: + request (Optional[Union[google.cloud.documentai_v1beta3.types.ListDocumentsRequest, dict]]): + The request object. + dataset (:class:`str`): + Required. The resource name of the + dataset to be listed. Format: + + projects/{project}/locations/{location}/processors/{processor}/dataset + + This corresponds to the ``dataset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.documentai_v1beta3.services.document_service.pagers.ListDocumentsAsyncPager: + Iterating over this object will yield + results and resolve additional pages + automatically. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([dataset]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = document_service.ListDocumentsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if dataset is not None: + request.dataset = dataset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.list_documents, + default_timeout=None, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("dataset", request.dataset),)), + ) + + # Send the request. + response = await rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # This method is paged; wrap the response in a pager, which provides + # an `__aiter__` convenience method. + response = pagers.ListDocumentsAsyncPager( + method=rpc, + request=request, + response=response, + metadata=metadata, + ) + + # Done; return the response. + return response + async def batch_delete_documents( self, request: Optional[ diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/client.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/client.py index 25f850303730..93cd6cd6b57e 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/client.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/client.py @@ -52,6 +52,7 @@ from google.longrunning import operations_pb2 # type: ignore from google.protobuf import field_mask_pb2 # type: ignore +from google.cloud.documentai_v1beta3.services.document_service import pagers from google.cloud.documentai_v1beta3.types import ( document, document_schema, @@ -860,6 +861,120 @@ def sample_get_document(): # Done; return the response. return response + def list_documents( + self, + request: Optional[Union[document_service.ListDocumentsRequest, dict]] = None, + *, + dataset: Optional[str] = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Union[float, object] = gapic_v1.method.DEFAULT, + metadata: Sequence[Tuple[str, str]] = (), + ) -> pagers.ListDocumentsPager: + r"""Returns a list of documents present in the dataset. + + .. code-block:: python + + # This snippet has been automatically generated and should be regarded as a + # code template only. + # It will require modifications to work: + # - It may require correct/in-range values for request initialization. + # - It may require specifying regional endpoints when creating the service + # client as shown in: + # https://googleapis.dev/python/google-api-core/latest/client_options.html + from google.cloud import documentai_v1beta3 + + def sample_list_documents(): + # Create a client + client = documentai_v1beta3.DocumentServiceClient() + + # Initialize request argument(s) + request = documentai_v1beta3.ListDocumentsRequest( + dataset="dataset_value", + ) + + # Make the request + page_result = client.list_documents(request=request) + + # Handle the response + for response in page_result: + print(response) + + Args: + request (Union[google.cloud.documentai_v1beta3.types.ListDocumentsRequest, dict]): + The request object. + dataset (str): + Required. The resource name of the + dataset to be listed. Format: + + projects/{project}/locations/{location}/processors/{processor}/dataset + + This corresponds to the ``dataset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.documentai_v1beta3.services.document_service.pagers.ListDocumentsPager: + Iterating over this object will yield + results and resolve additional pages + automatically. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([dataset]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a document_service.ListDocumentsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, document_service.ListDocumentsRequest): + request = document_service.ListDocumentsRequest(request) + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if dataset is not None: + request.dataset = dataset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.list_documents] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("dataset", request.dataset),)), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # This method is paged; wrap the response in a pager, which provides + # an `__iter__` convenience method. + response = pagers.ListDocumentsPager( + method=rpc, + request=request, + response=response, + metadata=metadata, + ) + + # Done; return the response. + return response + def batch_delete_documents( self, request: Optional[ diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/pagers.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/pagers.py new file mode 100644 index 000000000000..f91242404854 --- /dev/null +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/pagers.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import ( + Any, + AsyncIterator, + Awaitable, + Callable, + Iterator, + Optional, + Sequence, + Tuple, +) + +from google.cloud.documentai_v1beta3.types import document_service + + +class ListDocumentsPager: + """A pager for iterating through ``list_documents`` requests. + + This class thinly wraps an initial + :class:`google.cloud.documentai_v1beta3.types.ListDocumentsResponse` object, and + provides an ``__iter__`` method to iterate through its + ``document_metadata`` field. + + If there are more pages, the ``__iter__`` method will make additional + ``ListDocuments`` requests and continue to iterate + through the ``document_metadata`` field on the + corresponding responses. + + All the usual :class:`google.cloud.documentai_v1beta3.types.ListDocumentsResponse` + attributes are available on the pager. If multiple requests are made, only + the most recent response is retained, and thus used for attribute lookup. + """ + + def __init__( + self, + method: Callable[..., document_service.ListDocumentsResponse], + request: document_service.ListDocumentsRequest, + response: document_service.ListDocumentsResponse, + *, + metadata: Sequence[Tuple[str, str]] = () + ): + """Instantiate the pager. + + Args: + method (Callable): The method that was originally called, and + which instantiated this pager. + request (google.cloud.documentai_v1beta3.types.ListDocumentsRequest): + The initial request object. + response (google.cloud.documentai_v1beta3.types.ListDocumentsResponse): + The initial response object. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + """ + self._method = method + self._request = document_service.ListDocumentsRequest(request) + self._response = response + self._metadata = metadata + + def __getattr__(self, name: str) -> Any: + return getattr(self._response, name) + + @property + def pages(self) -> Iterator[document_service.ListDocumentsResponse]: + yield self._response + while self._response.next_page_token: + self._request.page_token = self._response.next_page_token + self._response = self._method(self._request, metadata=self._metadata) + yield self._response + + def __iter__(self) -> Iterator[document_service.DocumentMetadata]: + for page in self.pages: + yield from page.document_metadata + + def __repr__(self) -> str: + return "{0}<{1!r}>".format(self.__class__.__name__, self._response) + + +class ListDocumentsAsyncPager: + """A pager for iterating through ``list_documents`` requests. + + This class thinly wraps an initial + :class:`google.cloud.documentai_v1beta3.types.ListDocumentsResponse` object, and + provides an ``__aiter__`` method to iterate through its + ``document_metadata`` field. + + If there are more pages, the ``__aiter__`` method will make additional + ``ListDocuments`` requests and continue to iterate + through the ``document_metadata`` field on the + corresponding responses. + + All the usual :class:`google.cloud.documentai_v1beta3.types.ListDocumentsResponse` + attributes are available on the pager. If multiple requests are made, only + the most recent response is retained, and thus used for attribute lookup. + """ + + def __init__( + self, + method: Callable[..., Awaitable[document_service.ListDocumentsResponse]], + request: document_service.ListDocumentsRequest, + response: document_service.ListDocumentsResponse, + *, + metadata: Sequence[Tuple[str, str]] = () + ): + """Instantiates the pager. + + Args: + method (Callable): The method that was originally called, and + which instantiated this pager. + request (google.cloud.documentai_v1beta3.types.ListDocumentsRequest): + The initial request object. + response (google.cloud.documentai_v1beta3.types.ListDocumentsResponse): + The initial response object. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + """ + self._method = method + self._request = document_service.ListDocumentsRequest(request) + self._response = response + self._metadata = metadata + + def __getattr__(self, name: str) -> Any: + return getattr(self._response, name) + + @property + async def pages(self) -> AsyncIterator[document_service.ListDocumentsResponse]: + yield self._response + while self._response.next_page_token: + self._request.page_token = self._response.next_page_token + self._response = await self._method(self._request, metadata=self._metadata) + yield self._response + + def __aiter__(self) -> AsyncIterator[document_service.DocumentMetadata]: + async def async_generator(): + async for page in self.pages: + for response in page.document_metadata: + yield response + + return async_generator() + + def __repr__(self) -> str: + return "{0}<{1!r}>".format(self.__class__.__name__, self._response) diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/base.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/base.py index fb2d99970509..04e054ff1e7e 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/base.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/base.py @@ -139,6 +139,11 @@ def _prep_wrapped_messages(self, client_info): default_timeout=None, client_info=client_info, ), + self.list_documents: gapic_v1.method.wrap_method( + self.list_documents, + default_timeout=None, + client_info=client_info, + ), self.batch_delete_documents: gapic_v1.method.wrap_method( self.batch_delete_documents, default_timeout=None, @@ -200,6 +205,18 @@ def get_document( ]: raise NotImplementedError() + @property + def list_documents( + self, + ) -> Callable[ + [document_service.ListDocumentsRequest], + Union[ + document_service.ListDocumentsResponse, + Awaitable[document_service.ListDocumentsResponse], + ], + ]: + raise NotImplementedError() + @property def batch_delete_documents( self, diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc.py index f916e8c50ac4..b33bbdc21fe0 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc.py @@ -327,6 +327,34 @@ def get_document( ) return self._stubs["get_document"] + @property + def list_documents( + self, + ) -> Callable[ + [document_service.ListDocumentsRequest], document_service.ListDocumentsResponse + ]: + r"""Return a callable for the list documents method over gRPC. + + Returns a list of documents present in the dataset. + + Returns: + Callable[[~.ListDocumentsRequest], + ~.ListDocumentsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "list_documents" not in self._stubs: + self._stubs["list_documents"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1beta3.DocumentService/ListDocuments", + request_serializer=document_service.ListDocumentsRequest.serialize, + response_deserializer=document_service.ListDocumentsResponse.deserialize, + ) + return self._stubs["list_documents"] + @property def batch_delete_documents( self, diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc_asyncio.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc_asyncio.py index 5aaca005ed24..8ecc480bcc64 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc_asyncio.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/grpc_asyncio.py @@ -337,6 +337,35 @@ def get_document( ) return self._stubs["get_document"] + @property + def list_documents( + self, + ) -> Callable[ + [document_service.ListDocumentsRequest], + Awaitable[document_service.ListDocumentsResponse], + ]: + r"""Return a callable for the list documents method over gRPC. + + Returns a list of documents present in the dataset. + + Returns: + Callable[[~.ListDocumentsRequest], + Awaitable[~.ListDocumentsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "list_documents" not in self._stubs: + self._stubs["list_documents"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1beta3.DocumentService/ListDocuments", + request_serializer=document_service.ListDocumentsRequest.serialize, + response_deserializer=document_service.ListDocumentsResponse.deserialize, + ) + return self._stubs["list_documents"] + @property def batch_delete_documents( self, diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/rest.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/rest.py index aa2719ec71f0..337947072d2f 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/rest.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_service/transports/rest.py @@ -104,6 +104,14 @@ def post_import_documents(self, response): logging.log(f"Received response: {response}") return response + def pre_list_documents(self, request, metadata): + logging.log(f"Received request: {request}") + return request, metadata + + def post_list_documents(self, response): + logging.log(f"Received response: {response}") + return response + def pre_update_dataset(self, request, metadata): logging.log(f"Received request: {request}") return request, metadata @@ -218,6 +226,29 @@ def post_import_documents( """ return response + def pre_list_documents( + self, + request: document_service.ListDocumentsRequest, + metadata: Sequence[Tuple[str, str]], + ) -> Tuple[document_service.ListDocumentsRequest, Sequence[Tuple[str, str]]]: + """Pre-rpc interceptor for list_documents + + Override in a subclass to manipulate the request or metadata + before they are sent to the DocumentService server. + """ + return request, metadata + + def post_list_documents( + self, response: document_service.ListDocumentsResponse + ) -> document_service.ListDocumentsResponse: + """Post-rpc interceptor for list_documents + + Override in a subclass to manipulate the response + after it is returned by the DocumentService server but before + it is returned to user code. + """ + return response + def pre_update_dataset( self, request: document_service.UpdateDatasetRequest, @@ -906,6 +937,101 @@ def __call__( resp = self._interceptor.post_import_documents(resp) return resp + class _ListDocuments(DocumentServiceRestStub): + def __hash__(self): + return hash("ListDocuments") + + __REQUIRED_FIELDS_DEFAULT_VALUES: Dict[str, Any] = {} + + @classmethod + def _get_unset_required_fields(cls, message_dict): + return { + k: v + for k, v in cls.__REQUIRED_FIELDS_DEFAULT_VALUES.items() + if k not in message_dict + } + + def __call__( + self, + request: document_service.ListDocumentsRequest, + *, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: Optional[float] = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> document_service.ListDocumentsResponse: + r"""Call the list documents method over HTTP. + + Args: + request (~.document_service.ListDocumentsRequest): + The request object. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.document_service.ListDocumentsResponse: + + """ + + http_options: List[Dict[str, str]] = [ + { + "method": "post", + "uri": "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:listDocuments", + "body": "*", + }, + ] + request, metadata = self._interceptor.pre_list_documents(request, metadata) + pb_request = document_service.ListDocumentsRequest.pb(request) + transcoded_request = path_template.transcode(http_options, pb_request) + + # Jsonify the request body + + body = json_format.MessageToJson( + transcoded_request["body"], + including_default_value_fields=False, + use_integers_for_enums=True, + ) + uri = transcoded_request["uri"] + method = transcoded_request["method"] + + # Jsonify the query params + query_params = json.loads( + json_format.MessageToJson( + transcoded_request["query_params"], + including_default_value_fields=False, + use_integers_for_enums=True, + ) + ) + query_params.update(self._get_unset_required_fields(query_params)) + + query_params["$alt"] = "json;enum-encoding=int" + + # Send the request + headers = dict(metadata) + headers["Content-Type"] = "application/json" + response = getattr(self._session, method)( + "{host}{uri}".format(host=self._host, uri=uri), + timeout=timeout, + headers=headers, + params=rest_helpers.flatten_query_params(query_params, strict=True), + data=body, + ) + + # In case of error, raise the appropriate core_exceptions.GoogleAPICallError exception + # subclass. + if response.status_code >= 400: + raise core_exceptions.from_http_response(response) + + # Return the response + resp = document_service.ListDocumentsResponse() + pb_resp = document_service.ListDocumentsResponse.pb(resp) + + json_format.Parse(response.content, pb_resp, ignore_unknown_fields=True) + resp = self._interceptor.post_list_documents(resp) + return resp + class _UpdateDataset(DocumentServiceRestStub): def __hash__(self): return hash("UpdateDataset") @@ -1135,6 +1261,16 @@ def import_documents( # In C++ this would require a dynamic_cast return self._ImportDocuments(self._session, self._host, self._interceptor) # type: ignore + @property + def list_documents( + self, + ) -> Callable[ + [document_service.ListDocumentsRequest], document_service.ListDocumentsResponse + ]: + # The return type is fine, but mypy isn't sophisticated enough to determine what's going on here. + # In C++ this would require a dynamic_cast + return self._ListDocuments(self._session, self._host, self._interceptor) # type: ignore + @property def update_dataset( self, diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/__init__.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/__init__.py index 849ba2382a19..efe88aa57617 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/__init__.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/__init__.py @@ -80,12 +80,20 @@ UndeployProcessorVersionRequest, UndeployProcessorVersionResponse, ) -from .document_schema import DocumentSchema, EntityTypeMetadata, PropertyMetadata +from .document_schema import ( + DocumentSchema, + EntityTypeMetadata, + FieldExtractionMetadata, + PropertyMetadata, + SummaryOptions, +) from .document_service import ( BatchDeleteDocumentsMetadata, BatchDeleteDocumentsRequest, BatchDeleteDocumentsResponse, DatasetSplitType, + DocumentLabelingState, + DocumentMetadata, DocumentPageRange, GetDatasetSchemaRequest, GetDocumentRequest, @@ -93,6 +101,8 @@ ImportDocumentsMetadata, ImportDocumentsRequest, ImportDocumentsResponse, + ListDocumentsRequest, + ListDocumentsResponse, UpdateDatasetOperationMetadata, UpdateDatasetRequest, UpdateDatasetSchemaRequest, @@ -100,7 +110,7 @@ from .evaluation import Evaluation, EvaluationReference from .geometry import BoundingPoly, NormalizedVertex, Vertex from .operation_metadata import CommonOperationMetadata -from .processor import Processor, ProcessorVersion +from .processor import Processor, ProcessorVersion, ProcessorVersionAlias from .processor_type import ProcessorType __all__ = ( @@ -173,10 +183,13 @@ "UndeployProcessorVersionResponse", "DocumentSchema", "EntityTypeMetadata", + "FieldExtractionMetadata", "PropertyMetadata", + "SummaryOptions", "BatchDeleteDocumentsMetadata", "BatchDeleteDocumentsRequest", "BatchDeleteDocumentsResponse", + "DocumentMetadata", "DocumentPageRange", "GetDatasetSchemaRequest", "GetDocumentRequest", @@ -184,10 +197,13 @@ "ImportDocumentsMetadata", "ImportDocumentsRequest", "ImportDocumentsResponse", + "ListDocumentsRequest", + "ListDocumentsResponse", "UpdateDatasetOperationMetadata", "UpdateDatasetRequest", "UpdateDatasetSchemaRequest", "DatasetSplitType", + "DocumentLabelingState", "Evaluation", "EvaluationReference", "BoundingPoly", @@ -196,5 +212,6 @@ "CommonOperationMetadata", "Processor", "ProcessorVersion", + "ProcessorVersionAlias", "ProcessorType", ) diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py index 35ed74717b3c..8fb22a98f97d 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py @@ -1902,7 +1902,8 @@ class RevisionCase(proto.Enum): Values: REVISION_CASE_UNSPECIFIED (0): - Unspecified case, fallback to read the LATEST_HUMAN_REVIEW. + Unspecified case, fall back to read the + ``LATEST_HUMAN_REVIEW``. LATEST_HUMAN_REVIEW (1): The latest revision made by a human. LATEST_TIMESTAMP (2): diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_io.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_io.py index 73cfc30d986a..25ca2116723e 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_io.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_io.py @@ -44,6 +44,12 @@ class RawDocument(proto.Message): An IANA MIME type (RFC6838) indicating the nature and format of the [content][google.cloud.documentai.v1beta3.RawDocument.content]. + display_name (str): + The display name of the document, it supports all Unicode + characters except the following: ``*``, ``?``, ``[``, ``]``, + ``%``, ``{``, ``}``,\ ``'``, ``\"``, ``,`` ``~``, ``=`` and + ``:`` are reserved. If not specified, a default ID is + generated. """ content: bytes = proto.Field( @@ -54,6 +60,10 @@ class RawDocument(proto.Message): proto.STRING, number=2, ) + display_name: str = proto.Field( + proto.STRING, + number=3, + ) class GcsDocument(proto.Message): @@ -257,7 +267,7 @@ class OcrConfig(proto.Message): disable_character_boxes_detection (bool): Turn off character box detector in OCR engine. Character box detection is enabled by - default in OCR 2.0+ processors. + default in OCR 2.0 (and later) processors. premium_features (google.cloud.documentai_v1beta3.types.OcrConfig.PremiumFeatures): Configurations for premium OCR features. """ @@ -289,7 +299,8 @@ class PremiumFeatures(proto.Message): Attributes: enable_selection_mark_detection (bool): Turn on selection mark detector in OCR - engine. Only available in OCR 2.0+ processors. + engine. Only available in OCR 2.0 (and later) + processors. compute_style_info (bool): Turn on font identification model and return font style information. diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_processor_service.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_processor_service.py index 17f51ac9bec9..637808172eba 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_processor_service.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_processor_service.py @@ -106,8 +106,8 @@ class ProcessOptions(proto.Message): This field is a member of `oneof`_ ``page_range``. from_start (int): - Only process certain pages from the start, - process all if the document has less pages. + Only process certain pages from the start. + Process all if the document has fewer pages. This field is a member of `oneof`_ ``page_range``. from_end (int): @@ -118,6 +118,13 @@ class ProcessOptions(proto.Message): ocr_config (google.cloud.documentai_v1beta3.types.OcrConfig): Only applicable to ``OCR_PROCESSOR``. Returns error if set on other processor types. + schema_override (google.cloud.documentai_v1beta3.types.DocumentSchema): + Optional. Override the schema of the + [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. + Will return an Invalid Argument error if this field is set + when the underlying + [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] + doesn't support schema override. """ class IndividualPageSelector(proto.Message): @@ -155,6 +162,11 @@ class IndividualPageSelector(proto.Message): number=1, message=document_io.OcrConfig, ) + schema_override: gcd_document_schema.DocumentSchema = proto.Field( + proto.MESSAGE, + number=8, + message=gcd_document_schema.DocumentSchema, + ) class ProcessRequest(proto.Message): @@ -1268,8 +1280,8 @@ class CustomDocumentExtractionOptions(proto.Message): """ class TrainingMethod(proto.Enum): - r"""Training Method for CDE. TRAINING_METHOD_UNSPECIFIED will fallback - to MODEL_BASED. + r"""Training Method for CDE. ``TRAINING_METHOD_UNSPECIFIED`` will fall + back to ``MODEL_BASED``. Values: TRAINING_METHOD_UNSPECIFIED (0): @@ -1747,19 +1759,20 @@ def raw_page(self): class ImportProcessorVersionRequest(proto.Message): r"""The request message for the [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] - method. Requirements: + method. - - The Document AI `Service - Agent `__ of - the destination project must have `Document AI Editor - role `__ - on the source project. + The Document AI `Service + Agent `__ of the + destination project must have `Document AI Editor + role `__ + on the source project. The destination project is specified as part of the [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent] field. The source project is specified as part of the - [source][ImportProcessorVersionRequest.processor_version_source or - ImportProcessorVersionRequest.external_processor_version_source] + [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source] + or + [external_processor_version_source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.external_processor_version_source] field. This message has `oneof`_ fields (mutually exclusive fields). @@ -1778,9 +1791,9 @@ class ImportProcessorVersionRequest(proto.Message): This field is a member of `oneof`_ ``source``. external_processor_version_source (google.cloud.documentai_v1beta3.types.ImportProcessorVersionRequest.ExternalProcessorVersionSource): - The source processor version to import from, - and can be from different environment and region - than the destination processor. + The source processor version to import from. + It can be from a different environment and + region than the destination processor. This field is a member of `oneof`_ ``source``. parent (str): diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_schema.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_schema.py index 161929ac72b4..2b177b922f1c 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_schema.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_schema.py @@ -22,6 +22,8 @@ __protobuf__ = proto.module( package="google.cloud.documentai.v1beta3", manifest={ + "SummaryOptions", + "FieldExtractionMetadata", "PropertyMetadata", "EntityTypeMetadata", "DocumentSchema", @@ -29,6 +31,76 @@ ) +class SummaryOptions(proto.Message): + r"""Metadata for document summarization. + + Attributes: + length (google.cloud.documentai_v1beta3.types.SummaryOptions.Length): + How long the summary should be. + format_ (google.cloud.documentai_v1beta3.types.SummaryOptions.Format): + The format the summary should be in. + """ + + class Length(proto.Enum): + r"""The Length enum. + + Values: + LENGTH_UNSPECIFIED (0): + Default. + BRIEF (1): + A brief summary of one or two sentences. + MODERATE (2): + A paragraph-length summary. + COMPREHENSIVE (3): + The longest option available. + """ + LENGTH_UNSPECIFIED = 0 + BRIEF = 1 + MODERATE = 2 + COMPREHENSIVE = 3 + + class Format(proto.Enum): + r"""The Format enum. + + Values: + FORMAT_UNSPECIFIED (0): + Default. + PARAGRAPH (1): + Format the output in paragraphs. + BULLETS (2): + Format the output in bullets. + """ + FORMAT_UNSPECIFIED = 0 + PARAGRAPH = 1 + BULLETS = 2 + + length: Length = proto.Field( + proto.ENUM, + number=1, + enum=Length, + ) + format_: Format = proto.Field( + proto.ENUM, + number=2, + enum=Format, + ) + + +class FieldExtractionMetadata(proto.Message): + r"""Metadata for how this field value is extracted. + + Attributes: + summary_options (google.cloud.documentai_v1beta3.types.SummaryOptions): + Summary options config. + """ + + summary_options: "SummaryOptions" = proto.Field( + proto.MESSAGE, + number=2, + message="SummaryOptions", + ) + + class PropertyMetadata(proto.Message): r"""Metadata about a property. @@ -36,12 +108,19 @@ class PropertyMetadata(proto.Message): inactive (bool): Whether the property should be considered as "inactive". + field_extraction_metadata (google.cloud.documentai_v1beta3.types.FieldExtractionMetadata): + Field extraction metadata on the property. """ inactive: bool = proto.Field( proto.BOOL, number=3, ) + field_extraction_metadata: "FieldExtractionMetadata" = proto.Field( + proto.MESSAGE, + number=9, + message="FieldExtractionMetadata", + ) class EntityTypeMetadata(proto.Message): diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_service.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_service.py index 4dd9ee5b8013..f3f6445be1b1 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_service.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_service.py @@ -29,6 +29,7 @@ package="google.cloud.documentai.v1beta3", manifest={ "DatasetSplitType", + "DocumentLabelingState", "UpdateDatasetRequest", "UpdateDatasetOperationMetadata", "ImportDocumentsRequest", @@ -36,12 +37,15 @@ "ImportDocumentsMetadata", "GetDocumentRequest", "GetDocumentResponse", + "ListDocumentsRequest", + "ListDocumentsResponse", "BatchDeleteDocumentsRequest", "BatchDeleteDocumentsResponse", "BatchDeleteDocumentsMetadata", "GetDatasetSchemaRequest", "UpdateDatasetSchemaRequest", "DocumentPageRange", + "DocumentMetadata", }, ) @@ -53,7 +57,6 @@ class DatasetSplitType(proto.Enum): Values: DATASET_SPLIT_TYPE_UNSPECIFIED (0): Default value if the enum is not set. - go/protodosdonts#do-include-an-unspecified-value-in-an-enum DATASET_SPLIT_TRAIN (1): Identifies the train documents. DATASET_SPLIT_TEST (2): @@ -67,6 +70,25 @@ class DatasetSplitType(proto.Enum): DATASET_SPLIT_UNASSIGNED = 3 +class DocumentLabelingState(proto.Enum): + r"""Describes the labelling status of a document. + + Values: + DOCUMENT_LABELING_STATE_UNSPECIFIED (0): + Default value if the enum is not set. + DOCUMENT_LABELED (1): + Document has been labelled. + DOCUMENT_UNLABELED (2): + Document has not been labelled. + DOCUMENT_AUTO_LABELED (3): + Document has been auto-labelled. + """ + DOCUMENT_LABELING_STATE_UNSPECIFIED = 0 + DOCUMENT_LABELED = 1 + DOCUMENT_UNLABELED = 2 + DOCUMENT_AUTO_LABELED = 3 + + class UpdateDatasetRequest(proto.Message): r""" @@ -95,7 +117,7 @@ class UpdateDatasetOperationMetadata(proto.Message): Attributes: common_metadata (google.cloud.documentai_v1beta3.types.CommonOperationMetadata): - The basic metadata of the long running + The basic metadata of the long-running operation. """ @@ -201,7 +223,7 @@ class ImportDocumentsMetadata(proto.Message): Attributes: common_metadata (google.cloud.documentai_v1beta3.types.CommonOperationMetadata): - The basic metadata of the long running + The basic metadata of the long-running operation. individual_import_statuses (MutableSequence[google.cloud.documentai_v1beta3.types.ImportDocumentsMetadata.IndividualImportStatus]): The list of response details of each @@ -243,9 +265,10 @@ class IndividualImportStatus(proto.Message): ) class ImportConfigValidationResult(proto.Message): - r"""The validation status of each import config. Status is set to errors - if there is no documents to import in the import_config, or OK if - the operation will try to proceed at least one document. + r"""The validation status of each import config. Status is set to an + error if there are no documents to import in the ``import_config``, + or ``OK`` if the operation will try to proceed with at least one + document. Attributes: input_gcs_source (str): @@ -346,6 +369,130 @@ class GetDocumentResponse(proto.Message): ) +class ListDocumentsRequest(proto.Message): + r""" + + Attributes: + dataset (str): + Required. The resource name of the dataset to + be listed. Format: + + projects/{project}/locations/{location}/processors/{processor}/dataset + page_size (int): + The maximum number of documents to return. + The service may return fewer than this value. If + unspecified, at most 20 documents will be + returned. The maximum value is 100; values above + 100 will be coerced to 100. + page_token (str): + A page token, received from a previous ``ListDocuments`` + call. Provide this to retrieve the subsequent page. + + When paginating, all other parameters provided to + ``ListDocuments`` must match the call that provided the page + token. + filter (str): + Optional. Query to filter the documents based on + https://google.aip.dev/160. + + Currently support query strings are: + + - ``SplitType=DATASET_SPLIT_TEST|DATASET_SPLIT_TRAIN|DATASET_SPLIT_UNASSIGNED`` + - ``LabelingState=DOCUMENT_LABELED|DOCUMENT_UNLABELED|DOCUMENT_AUTO_LABELED`` + - ``DisplayName=\"file_name.pdf\"`` + - ``EntityType=abc/def`` + - ``TagName=\"auto-labeling-running\"|\"sampled\"`` + + Note: + + - Only ``AND``, ``=`` and ``!=`` are supported. e.g. + ``DisplayName=file_name AND EntityType!=abc`` IS + supported. + - Wildcard ``*`` is supported only in ``DisplayName`` + filter + - No duplicate filter keys are allowed, e.g. + ``EntityType=a AND EntityType=b`` is NOT supported. + - String match is case sensitive (for filter + ``DisplayName`` & ``EntityType``). + return_total_size (bool): + Optional. Controls if the ListDocuments request requires a + total size of matched documents. See + ListDocumentsResponse.total_size. + + Enabling this flag may adversely impact performance. + + Defaults to false. + skip (int): + Optional. Number of results to skip beginning from the + ``page_token`` if provided. + https://google.aip.dev/158#skipping-results. It must be a + non-negative integer. Negative values wil be rejected. Note + that this is not the number of pages to skip. If this value + causes the cursor to move past the end of results, + ``ListDocumentsResponse.document_metadata`` and + ``ListDocumentsResponse.next_page_token`` will be empty. + """ + + dataset: str = proto.Field( + proto.STRING, + number=1, + ) + page_size: int = proto.Field( + proto.INT32, + number=2, + ) + page_token: str = proto.Field( + proto.STRING, + number=3, + ) + filter: str = proto.Field( + proto.STRING, + number=4, + ) + return_total_size: bool = proto.Field( + proto.BOOL, + number=6, + ) + skip: int = proto.Field( + proto.INT32, + number=8, + ) + + +class ListDocumentsResponse(proto.Message): + r""" + + Attributes: + document_metadata (MutableSequence[google.cloud.documentai_v1beta3.types.DocumentMetadata]): + Document metadata corresponding to the listed + documents. + next_page_token (str): + A token, which can be sent as ``page_token`` to retrieve the + next page. If this field is omitted, there are no subsequent + pages. + total_size (int): + Total count of documents queried. + """ + + @property + def raw_page(self): + return self + + document_metadata: MutableSequence["DocumentMetadata"] = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="DocumentMetadata", + ) + next_page_token: str = proto.Field( + proto.STRING, + number=2, + ) + total_size: int = proto.Field( + proto.INT32, + number=3, + ) + + class BatchDeleteDocumentsRequest(proto.Message): r""" @@ -383,7 +530,7 @@ class BatchDeleteDocumentsMetadata(proto.Message): Attributes: common_metadata (google.cloud.documentai_v1beta3.types.CommonOperationMetadata): - The basic metadata of the long running + The basic metadata of the long-running operation. individual_batch_delete_statuses (MutableSequence[google.cloud.documentai_v1beta3.types.BatchDeleteDocumentsMetadata.IndividualBatchDeleteStatus]): The list of response details of each @@ -510,4 +657,46 @@ class DocumentPageRange(proto.Message): ) +class DocumentMetadata(proto.Message): + r"""Metadata about a document. + + Attributes: + document_id (google.cloud.documentai_v1beta3.types.DocumentId): + Document identifier. + page_count (int): + Number of pages in the document. + dataset_type (google.cloud.documentai_v1beta3.types.DatasetSplitType): + Type of the dataset split to which the + document belongs. + labeling_state (google.cloud.documentai_v1beta3.types.DocumentLabelingState): + Labelling state of the document. + display_name (str): + The display name of the document. + """ + + document_id: gcd_dataset.DocumentId = proto.Field( + proto.MESSAGE, + number=1, + message=gcd_dataset.DocumentId, + ) + page_count: int = proto.Field( + proto.INT32, + number=2, + ) + dataset_type: "DatasetSplitType" = proto.Field( + proto.ENUM, + number=3, + enum="DatasetSplitType", + ) + labeling_state: "DocumentLabelingState" = proto.Field( + proto.ENUM, + number=5, + enum="DocumentLabelingState", + ) + display_name: str = proto.Field( + proto.STRING, + number=6, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/processor.py b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/processor.py index cccf96a174a1..f9610c349925 100644 --- a/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/processor.py +++ b/packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/processor.py @@ -27,6 +27,7 @@ package="google.cloud.documentai.v1beta3", manifest={ "ProcessorVersion", + "ProcessorVersionAlias", "Processor", }, ) @@ -175,6 +176,29 @@ class DeprecationInfo(proto.Message): ) +class ProcessorVersionAlias(proto.Message): + r"""Contains the alias and the aliased resource name of processor + version. + + Attributes: + alias (str): + The alias in the form of ``processor_version`` resource + name. + processor_version (str): + The resource name of aliased processor + version. + """ + + alias: str = proto.Field( + proto.STRING, + number=1, + ) + processor_version: str = proto.Field( + proto.STRING, + number=2, + ) + + class Processor(proto.Message): r"""The first-class citizen for Document AI. Each processor defines how to extract structural information from a document. @@ -194,6 +218,8 @@ class Processor(proto.Message): Output only. The state of the processor. default_processor_version (str): The default processor version. + processor_version_aliases (MutableSequence[google.cloud.documentai_v1beta3.types.ProcessorVersionAlias]): + Output only. The processor version aliases. process_endpoint (str): Output only. Immutable. The http endpoint that can be called to invoke processing. @@ -271,6 +297,13 @@ class State(proto.Enum): proto.STRING, number=9, ) + processor_version_aliases: MutableSequence[ + "ProcessorVersionAlias" + ] = proto.RepeatedField( + proto.MESSAGE, + number=10, + message="ProcessorVersionAlias", + ) process_endpoint: str = proto.Field( proto.STRING, number=6, diff --git a/packages/google-cloud-documentai/noxfile.py b/packages/google-cloud-documentai/noxfile.py index 9a2acd8b6787..be54712bfa8f 100644 --- a/packages/google-cloud-documentai/noxfile.py +++ b/packages/google-cloud-documentai/noxfile.py @@ -46,7 +46,7 @@ UNIT_TEST_EXTRAS = [] UNIT_TEST_EXTRAS_BY_PYTHON = {} -SYSTEM_TEST_PYTHON_VERSIONS = [] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] SYSTEM_TEST_STANDARD_DEPENDENCIES = [ "mock", "pytest", @@ -405,24 +405,3 @@ def prerelease_deps(session): session.run("python", "-c", "import google.auth; print(google.auth.__version__)") session.run("py.test", "tests/unit") - - system_test_path = os.path.join("tests", "system.py") - system_test_folder_path = os.path.join("tests", "system") - - # Only run system tests if found. - if os.path.exists(system_test_path): - session.run( - "py.test", - "--verbose", - f"--junitxml=system_{session.python}_sponge_log.xml", - system_test_path, - *session.posargs, - ) - if os.path.exists(system_test_folder_path): - session.run( - "py.test", - "--verbose", - f"--junitxml=system_{session.python}_sponge_log.xml", - system_test_folder_path, - *session.posargs, - ) diff --git a/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_async.py b/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_async.py new file mode 100644 index 000000000000..c1cecf8cbcb4 --- /dev/null +++ b/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_async.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Generated code. DO NOT EDIT! +# +# Snippet for ListDocuments +# NOTE: This snippet has been automatically generated for illustrative purposes only. +# It may require modifications to work in your environment. + +# To install the latest published package dependency, execute the following: +# python3 -m pip install google-cloud-documentai + + +# [START documentai_v1beta3_generated_DocumentService_ListDocuments_async] +# This snippet has been automatically generated and should be regarded as a +# code template only. +# It will require modifications to work: +# - It may require correct/in-range values for request initialization. +# - It may require specifying regional endpoints when creating the service +# client as shown in: +# https://googleapis.dev/python/google-api-core/latest/client_options.html +from google.cloud import documentai_v1beta3 + + +async def sample_list_documents(): + # Create a client + client = documentai_v1beta3.DocumentServiceAsyncClient() + + # Initialize request argument(s) + request = documentai_v1beta3.ListDocumentsRequest( + dataset="dataset_value", + ) + + # Make the request + page_result = client.list_documents(request=request) + + # Handle the response + async for response in page_result: + print(response) + +# [END documentai_v1beta3_generated_DocumentService_ListDocuments_async] diff --git a/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_sync.py b/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_sync.py new file mode 100644 index 000000000000..2ff442f74696 --- /dev/null +++ b/packages/google-cloud-documentai/samples/generated_samples/documentai_v1beta3_generated_document_service_list_documents_sync.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Generated code. DO NOT EDIT! +# +# Snippet for ListDocuments +# NOTE: This snippet has been automatically generated for illustrative purposes only. +# It may require modifications to work in your environment. + +# To install the latest published package dependency, execute the following: +# python3 -m pip install google-cloud-documentai + + +# [START documentai_v1beta3_generated_DocumentService_ListDocuments_sync] +# This snippet has been automatically generated and should be regarded as a +# code template only. +# It will require modifications to work: +# - It may require correct/in-range values for request initialization. +# - It may require specifying regional endpoints when creating the service +# client as shown in: +# https://googleapis.dev/python/google-api-core/latest/client_options.html +from google.cloud import documentai_v1beta3 + + +def sample_list_documents(): + # Create a client + client = documentai_v1beta3.DocumentServiceClient() + + # Initialize request argument(s) + request = documentai_v1beta3.ListDocumentsRequest( + dataset="dataset_value", + ) + + # Make the request + page_result = client.list_documents(request=request) + + # Handle the response + for response in page_result: + print(response) + +# [END documentai_v1beta3_generated_DocumentService_ListDocuments_sync] diff --git a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1.json b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1.json index 12d7ad027832..96d60af285a2 100644 --- a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1.json +++ b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-documentai", - "version": "2.19.0" + "version": "0.1.0" }, "snippets": [ { diff --git a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta2.json b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta2.json index b92e5ed066c0..ef56bd7e3eb3 100644 --- a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta2.json +++ b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta2.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-documentai", - "version": "2.19.0" + "version": "0.1.0" }, "snippets": [ { diff --git a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta3.json b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta3.json index 16c3bacb6e0f..f47545a8ed3e 100644 --- a/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta3.json +++ b/packages/google-cloud-documentai/samples/generated_samples/snippet_metadata_google.cloud.documentai.v1beta3.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-documentai", - "version": "2.19.0" + "version": "0.1.0" }, "snippets": [ { @@ -4350,6 +4350,167 @@ ], "title": "documentai_v1beta3_generated_document_service_import_documents_sync.py" }, + { + "canonical": true, + "clientMethod": { + "async": true, + "client": { + "fullName": "google.cloud.documentai_v1beta3.DocumentServiceAsyncClient", + "shortName": "DocumentServiceAsyncClient" + }, + "fullName": "google.cloud.documentai_v1beta3.DocumentServiceAsyncClient.list_documents", + "method": { + "fullName": "google.cloud.documentai.v1beta3.DocumentService.ListDocuments", + "service": { + "fullName": "google.cloud.documentai.v1beta3.DocumentService", + "shortName": "DocumentService" + }, + "shortName": "ListDocuments" + }, + "parameters": [ + { + "name": "request", + "type": "google.cloud.documentai_v1beta3.types.ListDocumentsRequest" + }, + { + "name": "dataset", + "type": "str" + }, + { + "name": "retry", + "type": "google.api_core.retry.Retry" + }, + { + "name": "timeout", + "type": "float" + }, + { + "name": "metadata", + "type": "Sequence[Tuple[str, str]" + } + ], + "resultType": "google.cloud.documentai_v1beta3.services.document_service.pagers.ListDocumentsAsyncPager", + "shortName": "list_documents" + }, + "description": "Sample for ListDocuments", + "file": "documentai_v1beta3_generated_document_service_list_documents_async.py", + "language": "PYTHON", + "origin": "API_DEFINITION", + "regionTag": "documentai_v1beta3_generated_DocumentService_ListDocuments_async", + "segments": [ + { + "end": 52, + "start": 27, + "type": "FULL" + }, + { + "end": 52, + "start": 27, + "type": "SHORT" + }, + { + "end": 40, + "start": 38, + "type": "CLIENT_INITIALIZATION" + }, + { + "end": 45, + "start": 41, + "type": "REQUEST_INITIALIZATION" + }, + { + "end": 48, + "start": 46, + "type": "REQUEST_EXECUTION" + }, + { + "end": 53, + "start": 49, + "type": "RESPONSE_HANDLING" + } + ], + "title": "documentai_v1beta3_generated_document_service_list_documents_async.py" + }, + { + "canonical": true, + "clientMethod": { + "client": { + "fullName": "google.cloud.documentai_v1beta3.DocumentServiceClient", + "shortName": "DocumentServiceClient" + }, + "fullName": "google.cloud.documentai_v1beta3.DocumentServiceClient.list_documents", + "method": { + "fullName": "google.cloud.documentai.v1beta3.DocumentService.ListDocuments", + "service": { + "fullName": "google.cloud.documentai.v1beta3.DocumentService", + "shortName": "DocumentService" + }, + "shortName": "ListDocuments" + }, + "parameters": [ + { + "name": "request", + "type": "google.cloud.documentai_v1beta3.types.ListDocumentsRequest" + }, + { + "name": "dataset", + "type": "str" + }, + { + "name": "retry", + "type": "google.api_core.retry.Retry" + }, + { + "name": "timeout", + "type": "float" + }, + { + "name": "metadata", + "type": "Sequence[Tuple[str, str]" + } + ], + "resultType": "google.cloud.documentai_v1beta3.services.document_service.pagers.ListDocumentsPager", + "shortName": "list_documents" + }, + "description": "Sample for ListDocuments", + "file": "documentai_v1beta3_generated_document_service_list_documents_sync.py", + "language": "PYTHON", + "origin": "API_DEFINITION", + "regionTag": "documentai_v1beta3_generated_DocumentService_ListDocuments_sync", + "segments": [ + { + "end": 52, + "start": 27, + "type": "FULL" + }, + { + "end": 52, + "start": 27, + "type": "SHORT" + }, + { + "end": 40, + "start": 38, + "type": "CLIENT_INITIALIZATION" + }, + { + "end": 45, + "start": 41, + "type": "REQUEST_INITIALIZATION" + }, + { + "end": 48, + "start": 46, + "type": "REQUEST_EXECUTION" + }, + { + "end": 53, + "start": 49, + "type": "RESPONSE_HANDLING" + } + ], + "title": "documentai_v1beta3_generated_document_service_list_documents_sync.py" + }, { "canonical": true, "clientMethod": { diff --git a/packages/google-cloud-documentai/scripts/fixup_documentai_v1beta3_keywords.py b/packages/google-cloud-documentai/scripts/fixup_documentai_v1beta3_keywords.py index 432d626bc4d5..40bd65af9ae3 100644 --- a/packages/google-cloud-documentai/scripts/fixup_documentai_v1beta3_keywords.py +++ b/packages/google-cloud-documentai/scripts/fixup_documentai_v1beta3_keywords.py @@ -57,6 +57,7 @@ class documentaiCallTransformer(cst.CSTTransformer): 'get_processor_version': ('name', ), 'import_documents': ('dataset', 'batch_documents_import_configs', ), 'import_processor_version': ('parent', 'processor_version_source', 'external_processor_version_source', ), + 'list_documents': ('dataset', 'page_size', 'page_token', 'filter', 'return_total_size', 'skip', ), 'list_evaluations': ('parent', 'page_size', 'page_token', ), 'list_processors': ('parent', 'page_size', 'page_token', ), 'list_processor_types': ('parent', 'page_size', 'page_token', ), diff --git a/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py b/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py index 9187c565e35f..48571b0c2731 100644 --- a/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py +++ b/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py @@ -10738,6 +10738,9 @@ def test_create_processor_rest(request_type): "display_name": "display_name_value", "state": 1, "default_processor_version": "default_processor_version_value", + "processor_version_aliases": [ + {"alias": "alias_value", "processor_version": "processor_version_value"} + ], "process_endpoint": "process_endpoint_value", "create_time": {"seconds": 751, "nanos": 543}, "kms_key_name": "kms_key_name_value", @@ -10947,6 +10950,9 @@ def test_create_processor_rest_bad_request( "display_name": "display_name_value", "state": 1, "default_processor_version": "default_processor_version_value", + "processor_version_aliases": [ + {"alias": "alias_value", "processor_version": "processor_version_value"} + ], "process_endpoint": "process_endpoint_value", "create_time": {"seconds": 751, "nanos": 543}, "kms_key_name": "kms_key_name_value", diff --git a/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_service.py b/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_service.py index d59cded4c52b..3496b9be09e7 100644 --- a/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_service.py +++ b/packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_service.py @@ -57,6 +57,7 @@ from google.cloud.documentai_v1beta3.services.document_service import ( DocumentServiceAsyncClient, DocumentServiceClient, + pagers, transports, ) from google.cloud.documentai_v1beta3.types import ( @@ -1465,6 +1466,432 @@ async def test_get_document_flattened_error_async(): ) +@pytest.mark.parametrize( + "request_type", + [ + document_service.ListDocumentsRequest, + dict, + ], +) +def test_list_documents(request_type, transport: str = "grpc"): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_service.ListDocumentsResponse( + next_page_token="next_page_token_value", + total_size=1086, + ) + response = client.list_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == document_service.ListDocumentsRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, pagers.ListDocumentsPager) + assert response.next_page_token == "next_page_token_value" + assert response.total_size == 1086 + + +def test_list_documents_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + client.list_documents() + call.assert_called() + _, args, _ = call.mock_calls[0] + assert args[0] == document_service.ListDocumentsRequest() + + +@pytest.mark.asyncio +async def test_list_documents_async( + transport: str = "grpc_asyncio", request_type=document_service.ListDocumentsRequest +): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_service.ListDocumentsResponse( + next_page_token="next_page_token_value", + total_size=1086, + ) + ) + response = await client.list_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == document_service.ListDocumentsRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, pagers.ListDocumentsAsyncPager) + assert response.next_page_token == "next_page_token_value" + assert response.total_size == 1086 + + +@pytest.mark.asyncio +async def test_list_documents_async_from_dict(): + await test_list_documents_async(request_type=dict) + + +def test_list_documents_field_headers(): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_service.ListDocumentsRequest() + + request.dataset = "dataset_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + call.return_value = document_service.ListDocumentsResponse() + client.list_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "dataset=dataset_value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_list_documents_field_headers_async(): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_service.ListDocumentsRequest() + + request.dataset = "dataset_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_service.ListDocumentsResponse() + ) + await client.list_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "dataset=dataset_value", + ) in kw["metadata"] + + +def test_list_documents_flattened(): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_service.ListDocumentsResponse() + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.list_documents( + dataset="dataset_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + arg = args[0].dataset + mock_val = "dataset_value" + assert arg == mock_val + + +def test_list_documents_flattened_error(): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.list_documents( + document_service.ListDocumentsRequest(), + dataset="dataset_value", + ) + + +@pytest.mark.asyncio +async def test_list_documents_flattened_async(): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_service.ListDocumentsResponse() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_service.ListDocumentsResponse() + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.list_documents( + dataset="dataset_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + arg = args[0].dataset + mock_val = "dataset_value" + assert arg == mock_val + + +@pytest.mark.asyncio +async def test_list_documents_flattened_error_async(): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.list_documents( + document_service.ListDocumentsRequest(), + dataset="dataset_value", + ) + + +def test_list_documents_pager(transport_name: str = "grpc"): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials, + transport=transport_name, + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Set the response to a series of pages. + call.side_effect = ( + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + next_page_token="abc", + ), + document_service.ListDocumentsResponse( + document_metadata=[], + next_page_token="def", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + ], + next_page_token="ghi", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + ), + RuntimeError, + ) + + metadata = () + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("dataset", ""),)), + ) + pager = client.list_documents(request={}) + + assert pager._metadata == metadata + + results = list(pager) + assert len(results) == 6 + assert all(isinstance(i, document_service.DocumentMetadata) for i in results) + + +def test_list_documents_pages(transport_name: str = "grpc"): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials, + transport=transport_name, + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.list_documents), "__call__") as call: + # Set the response to a series of pages. + call.side_effect = ( + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + next_page_token="abc", + ), + document_service.ListDocumentsResponse( + document_metadata=[], + next_page_token="def", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + ], + next_page_token="ghi", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + ), + RuntimeError, + ) + pages = list(client.list_documents(request={}).pages) + for page_, token in zip(pages, ["abc", "def", "ghi", ""]): + assert page_.raw_page.next_page_token == token + + +@pytest.mark.asyncio +async def test_list_documents_async_pager(): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials, + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.list_documents), "__call__", new_callable=mock.AsyncMock + ) as call: + # Set the response to a series of pages. + call.side_effect = ( + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + next_page_token="abc", + ), + document_service.ListDocumentsResponse( + document_metadata=[], + next_page_token="def", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + ], + next_page_token="ghi", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + ), + RuntimeError, + ) + async_pager = await client.list_documents( + request={}, + ) + assert async_pager.next_page_token == "abc" + responses = [] + async for response in async_pager: # pragma: no branch + responses.append(response) + + assert len(responses) == 6 + assert all(isinstance(i, document_service.DocumentMetadata) for i in responses) + + +@pytest.mark.asyncio +async def test_list_documents_async_pages(): + client = DocumentServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials, + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.list_documents), "__call__", new_callable=mock.AsyncMock + ) as call: + # Set the response to a series of pages. + call.side_effect = ( + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + next_page_token="abc", + ), + document_service.ListDocumentsResponse( + document_metadata=[], + next_page_token="def", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + ], + next_page_token="ghi", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + ), + RuntimeError, + ) + pages = [] + # Workaround issue in python 3.9 related to code coverage by adding `# pragma: no branch` + # See https://github.com/googleapis/gapic-generator-python/pull/1174#issuecomment-1025132372 + async for page_ in ( # pragma: no branch + await client.list_documents(request={}) + ).pages: + pages.append(page_) + for page_, token in zip(pages, ["abc", "def", "ghi", ""]): + assert page_.raw_page.next_page_token == token + + @pytest.mark.parametrize( "request_type", [ @@ -3094,6 +3521,342 @@ def test_get_document_rest_error(): ) +@pytest.mark.parametrize( + "request_type", + [ + document_service.ListDocumentsRequest, + dict, + ], +) +def test_list_documents_rest(request_type): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="rest", + ) + + # send a request that will satisfy transcoding + request_init = { + "dataset": "projects/sample1/locations/sample2/processors/sample3/dataset" + } + request = request_type(**request_init) + + # Mock the http request call within the method and fake a response. + with mock.patch.object(type(client.transport._session), "request") as req: + # Designate an appropriate value for the returned response. + return_value = document_service.ListDocumentsResponse( + next_page_token="next_page_token_value", + total_size=1086, + ) + + # Wrap the value into a proper Response obj + response_value = Response() + response_value.status_code = 200 + pb_return_value = document_service.ListDocumentsResponse.pb(return_value) + json_return_value = json_format.MessageToJson(pb_return_value) + + response_value._content = json_return_value.encode("UTF-8") + req.return_value = response_value + response = client.list_documents(request) + + # Establish that the response is the type that we expect. + assert isinstance(response, pagers.ListDocumentsPager) + assert response.next_page_token == "next_page_token_value" + assert response.total_size == 1086 + + +def test_list_documents_rest_required_fields( + request_type=document_service.ListDocumentsRequest, +): + transport_class = transports.DocumentServiceRestTransport + + request_init = {} + request_init["dataset"] = "" + request = request_type(**request_init) + pb_request = request_type.pb(request) + jsonified_request = json.loads( + json_format.MessageToJson( + pb_request, + including_default_value_fields=False, + use_integers_for_enums=False, + ) + ) + + # verify fields with default values are dropped + + unset_fields = transport_class( + credentials=ga_credentials.AnonymousCredentials() + ).list_documents._get_unset_required_fields(jsonified_request) + jsonified_request.update(unset_fields) + + # verify required fields with default values are now present + + jsonified_request["dataset"] = "dataset_value" + + unset_fields = transport_class( + credentials=ga_credentials.AnonymousCredentials() + ).list_documents._get_unset_required_fields(jsonified_request) + jsonified_request.update(unset_fields) + + # verify required fields with non-default values are left alone + assert "dataset" in jsonified_request + assert jsonified_request["dataset"] == "dataset_value" + + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="rest", + ) + request = request_type(**request_init) + + # Designate an appropriate value for the returned response. + return_value = document_service.ListDocumentsResponse() + # Mock the http request call within the method and fake a response. + with mock.patch.object(Session, "request") as req: + # We need to mock transcode() because providing default values + # for required fields will fail the real version if the http_options + # expect actual values for those fields. + with mock.patch.object(path_template, "transcode") as transcode: + # A uri without fields and an empty body will force all the + # request fields to show up in the query_params. + pb_request = request_type.pb(request) + transcode_result = { + "uri": "v1/sample_method", + "method": "post", + "query_params": pb_request, + } + transcode_result["body"] = pb_request + transcode.return_value = transcode_result + + response_value = Response() + response_value.status_code = 200 + + pb_return_value = document_service.ListDocumentsResponse.pb(return_value) + json_return_value = json_format.MessageToJson(pb_return_value) + + response_value._content = json_return_value.encode("UTF-8") + req.return_value = response_value + + response = client.list_documents(request) + + expected_params = [("$alt", "json;enum-encoding=int")] + actual_params = req.call_args.kwargs["params"] + assert expected_params == actual_params + + +def test_list_documents_rest_unset_required_fields(): + transport = transports.DocumentServiceRestTransport( + credentials=ga_credentials.AnonymousCredentials + ) + + unset_fields = transport.list_documents._get_unset_required_fields({}) + assert set(unset_fields) == (set(()) & set(("dataset",))) + + +@pytest.mark.parametrize("null_interceptor", [True, False]) +def test_list_documents_rest_interceptors(null_interceptor): + transport = transports.DocumentServiceRestTransport( + credentials=ga_credentials.AnonymousCredentials(), + interceptor=None + if null_interceptor + else transports.DocumentServiceRestInterceptor(), + ) + client = DocumentServiceClient(transport=transport) + with mock.patch.object( + type(client.transport._session), "request" + ) as req, mock.patch.object( + path_template, "transcode" + ) as transcode, mock.patch.object( + transports.DocumentServiceRestInterceptor, "post_list_documents" + ) as post, mock.patch.object( + transports.DocumentServiceRestInterceptor, "pre_list_documents" + ) as pre: + pre.assert_not_called() + post.assert_not_called() + pb_message = document_service.ListDocumentsRequest.pb( + document_service.ListDocumentsRequest() + ) + transcode.return_value = { + "method": "post", + "uri": "my_uri", + "body": pb_message, + "query_params": pb_message, + } + + req.return_value = Response() + req.return_value.status_code = 200 + req.return_value.request = PreparedRequest() + req.return_value._content = document_service.ListDocumentsResponse.to_json( + document_service.ListDocumentsResponse() + ) + + request = document_service.ListDocumentsRequest() + metadata = [ + ("key", "val"), + ("cephalopod", "squid"), + ] + pre.return_value = request, metadata + post.return_value = document_service.ListDocumentsResponse() + + client.list_documents( + request, + metadata=[ + ("key", "val"), + ("cephalopod", "squid"), + ], + ) + + pre.assert_called_once() + post.assert_called_once() + + +def test_list_documents_rest_bad_request( + transport: str = "rest", request_type=document_service.ListDocumentsRequest +): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # send a request that will satisfy transcoding + request_init = { + "dataset": "projects/sample1/locations/sample2/processors/sample3/dataset" + } + request = request_type(**request_init) + + # Mock the http request call within the method and fake a BadRequest error. + with mock.patch.object(Session, "request") as req, pytest.raises( + core_exceptions.BadRequest + ): + # Wrap the value into a proper Response obj + response_value = Response() + response_value.status_code = 400 + response_value.request = Request() + req.return_value = response_value + client.list_documents(request) + + +def test_list_documents_rest_flattened(): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="rest", + ) + + # Mock the http request call within the method and fake a response. + with mock.patch.object(type(client.transport._session), "request") as req: + # Designate an appropriate value for the returned response. + return_value = document_service.ListDocumentsResponse() + + # get arguments that satisfy an http rule for this method + sample_request = { + "dataset": "projects/sample1/locations/sample2/processors/sample3/dataset" + } + + # get truthy value for each flattened field + mock_args = dict( + dataset="dataset_value", + ) + mock_args.update(sample_request) + + # Wrap the value into a proper Response obj + response_value = Response() + response_value.status_code = 200 + pb_return_value = document_service.ListDocumentsResponse.pb(return_value) + json_return_value = json_format.MessageToJson(pb_return_value) + response_value._content = json_return_value.encode("UTF-8") + req.return_value = response_value + + client.list_documents(**mock_args) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(req.mock_calls) == 1 + _, args, _ = req.mock_calls[0] + assert path_template.validate( + "%s/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:listDocuments" + % client.transport._host, + args[1], + ) + + +def test_list_documents_rest_flattened_error(transport: str = "rest"): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.list_documents( + document_service.ListDocumentsRequest(), + dataset="dataset_value", + ) + + +def test_list_documents_rest_pager(transport: str = "rest"): + client = DocumentServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Mock the http request call within the method and fake a response. + with mock.patch.object(Session, "request") as req: + # TODO(kbandes): remove this mock unless there's a good reason for it. + # with mock.patch.object(path_template, 'transcode') as transcode: + # Set the response as a series of pages + response = ( + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + next_page_token="abc", + ), + document_service.ListDocumentsResponse( + document_metadata=[], + next_page_token="def", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + ], + next_page_token="ghi", + ), + document_service.ListDocumentsResponse( + document_metadata=[ + document_service.DocumentMetadata(), + document_service.DocumentMetadata(), + ], + ), + ) + # Two responses for two calls + response = response + response + + # Wrap the values into proper Response objs + response = tuple( + document_service.ListDocumentsResponse.to_json(x) for x in response + ) + return_values = tuple(Response() for i in response) + for return_val, response_val in zip(return_values, response): + return_val._content = response_val.encode("UTF-8") + return_val.status_code = 200 + req.side_effect = return_values + + sample_request = { + "dataset": "projects/sample1/locations/sample2/processors/sample3/dataset" + } + + pager = client.list_documents(request=sample_request) + + results = list(pager) + assert len(results) == 6 + assert all(isinstance(i, document_service.DocumentMetadata) for i in results) + + pages = list(client.list_documents(request=sample_request).pages) + for page_, token in zip(pages, ["abc", "def", "ghi", ""]): + assert page_.raw_page.next_page_token == token + + @pytest.mark.parametrize( "request_type", [ @@ -3683,7 +4446,12 @@ def test_update_dataset_schema_rest(request_type): "name": "name_value", "value_type": "value_type_value", "occurrence_type": 1, - "property_metadata": {"inactive": True}, + "property_metadata": { + "inactive": True, + "field_extraction_metadata": { + "summary_options": {"length": 1, "format_": 1} + }, + }, } ], "entity_type_metadata": {"inactive": True}, @@ -3892,7 +4660,12 @@ def test_update_dataset_schema_rest_bad_request( "name": "name_value", "value_type": "value_type_value", "occurrence_type": 1, - "property_metadata": {"inactive": True}, + "property_metadata": { + "inactive": True, + "field_extraction_metadata": { + "summary_options": {"length": 1, "format_": 1} + }, + }, } ], "entity_type_metadata": {"inactive": True}, @@ -4130,6 +4903,7 @@ def test_document_service_base_transport(): "update_dataset", "import_documents", "get_document", + "list_documents", "batch_delete_documents", "get_dataset_schema", "update_dataset_schema", @@ -4427,6 +5201,9 @@ def test_document_service_client_transport_session_collision(transport_name): session1 = client1.transport.get_document._session session2 = client2.transport.get_document._session assert session1 != session2 + session1 = client1.transport.list_documents._session + session2 = client2.transport.list_documents._session + assert session1 != session2 session1 = client1.transport.batch_delete_documents._session session2 = client2.transport.batch_delete_documents._session assert session1 != session2