19
19
# To install the latest published package dependency, execute the following:
20
20
# pip install google-cloud-vision
21
21
22
+ # sample-metadata
23
+ # title:
24
+ # description: Perform batch file annotation
25
+ # usage: python3 samples/v1/vision_batch_annotate_files_gcs.py [--storage_uri "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"]
22
26
import sys
23
27
24
28
# [START vision_batch_annotate_files_gcs]
27
31
from google .cloud .vision_v1 import enums
28
32
import six
29
33
30
- def sample_batch_annotate_files (gcs_uri ):
31
- """Perform batch file annotation"""
34
+ def sample_batch_annotate_files (storage_uri ):
35
+ """
36
+ Perform batch file annotation
37
+
38
+ Args:
39
+ storage_uri Cloud Storage URI to source image in the format gs://[bucket]/
40
+ [file]
41
+ """
32
42
# [START vision_batch_annotate_files_gcs_core]
33
43
34
44
client = vision_v1 .ImageAnnotatorClient ()
35
45
36
- # gcs_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf'
46
+ # storage_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf'
37
47
38
- if isinstance (gcs_uri , six .binary_type ):
39
- gcs_uri = gcs_uri .decode ('utf-8' )
40
- gcs_source = {'uri' : gcs_uri }
48
+ if isinstance (storage_uri , six .binary_type ):
49
+ storage_uri = storage_uri .decode ('utf-8' )
50
+ gcs_source = {'uri' : storage_uri }
41
51
input_config = {'gcs_source' : gcs_source }
42
52
type_ = enums .Feature .Type .DOCUMENT_TEXT_DETECTION
43
53
features_element = {'type' : type_ }
44
54
features = [features_element ]
45
55
46
- # The service can process up to 5 pages per document file. Here we specify the
47
- # first, second, and last page of the document to be processed.
56
+ # The service can process up to 5 pages per document file.
57
+ # Here we specify the first, second, and last page of the document to be
58
+ # processed.
48
59
pages_element = 1
49
60
pages_element_2 = 2
50
61
pages_element_3 = - 1
@@ -57,7 +68,6 @@ def sample_batch_annotate_files(gcs_uri):
57
68
print ('Full text: {}' .format (image_response .full_text_annotation .text ))
58
69
for page in image_response .full_text_annotation .pages :
59
70
for block in page .blocks :
60
- # The service also returns the bounding boxes for blocks, paragraphs, words, and symbols.
61
71
print ('\n Block confidence: {}' .format (block .confidence ))
62
72
for par in block .paragraphs :
63
73
print ('\t Paragraph confidence: {}' .format (par .confidence ))
@@ -73,10 +83,10 @@ def main():
73
83
import argparse
74
84
75
85
parser = argparse .ArgumentParser ()
76
- parser .add_argument ('--gcs_uri ' , type = str , default = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' )
86
+ parser .add_argument ('--storage_uri ' , type = str , default = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' )
77
87
args = parser .parse_args ()
78
88
79
- sample_batch_annotate_files (args .gcs_uri )
89
+ sample_batch_annotate_files (args .storage_uri )
80
90
81
91
if __name__ == '__main__' :
82
92
main ()
0 commit comments