32
32
from google .generativeai .types .model_types import idecode_time
33
33
from google .generativeai .utils import flatten_update_paths
34
34
35
+ _VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$"
36
+ NAME_ERROR_MSG = """The `name` must consist of alphanumeric characters (or -) and be 40 or fewer characters. The name you entered:
37
+ \t len(name)== {length}
38
+ \t name={name}
39
+ """
40
+
41
+
42
+ def valid_name (name ):
43
+ return re .match (_VALID_NAME , name ) and len (name ) < 40
35
44
36
- _DOCUMENT_NAME_REGEX = re .compile (r"^corpora/[a-z0-9-]+/documents/[a-z0-9-]+$" )
37
- _CHUNK_NAME_REGEX = re .compile (r"^corpora/([^/]+?)(/documents/([^/]+?)(/chunks/([^/]+?))?)?$" )
38
- _REMOVE = string .punctuation
39
- _REMOVE = _REMOVE .replace ("-" , "" ) # Don't remove hyphens
40
- _PATTERN = r"[{}]" .format (_REMOVE ) # Create the pattern
41
45
42
46
Operator = glm .Condition .Operator
43
47
State = glm .Chunk .State
@@ -180,7 +184,7 @@ class Corpus:
180
184
181
185
def create_document (
182
186
self ,
183
- name : Optional [ str ] = None ,
187
+ name : str ,
184
188
display_name : Optional [str ] = None ,
185
189
custom_metadata : Optional [list [CustomMetadata ]] = None ,
186
190
client : glm .RetrieverServiceClient | None = None ,
@@ -203,32 +207,22 @@ def create_document(
203
207
if client is None :
204
208
client = get_default_retriever_client ()
205
209
206
- if not name and not display_name :
207
- raise ValueError ("Either the document name or display name must be specified." )
208
-
209
210
document = None
210
- if name :
211
- if re .match (_DOCUMENT_NAME_REGEX , name ):
212
- document = glm .Document (
213
- name = name , display_name = display_name , custom_metadata = custom_metadata
214
- )
215
- elif f"corpora/{ self .name } /documents/" not in name :
216
- document_name = f"{ self .name } /documents/" + re .sub (_PATTERN , "" , name )
217
- document = glm .Document (
218
- name = document_name , display_name = display_name , custom_metadata = custom_metadata
219
- )
220
- else :
221
- raise ValueError (
222
- f"Document name must be formatted as { self .name } /document/<document_name>."
223
- )
211
+ if valid_name (name ):
212
+ document_name = f"{ self .name } /documents/{ name } "
213
+ document = glm .Document (
214
+ name = document_name , display_name = display_name , custom_metadata = custom_metadata
215
+ )
216
+ else :
217
+ raise ValueError (NAME_ERROR_MSG .format (length = len (name ), name = name ))
224
218
225
219
request = glm .CreateDocumentRequest (parent = self .name , document = document )
226
220
response = client .create_document (request )
227
221
return decode_document (response )
228
222
229
223
async def create_document_async (
230
224
self ,
231
- name : Optional [ str ] = None ,
225
+ name : str ,
232
226
display_name : Optional [str ] = None ,
233
227
custom_metadata : Optional [list [CustomMetadata ]] = None ,
234
228
client : glm .RetrieverServiceAsyncClient | None = None ,
@@ -237,24 +231,14 @@ async def create_document_async(
237
231
if client is None :
238
232
client = get_default_retriever_async_client ()
239
233
240
- if not name and not display_name :
241
- raise ValueError ("Either the document name or display name must be specified." )
242
-
243
234
document = None
244
- if name :
245
- if re .match (_DOCUMENT_NAME_REGEX , name ):
246
- document = glm .Document (
247
- name = name , display_name = display_name , custom_metadata = custom_metadata
248
- )
249
- elif f"corpora/{ self .name } /documents/" not in name :
250
- document_name = f"{ self .name } /documents/" + re .sub (_PATTERN , "" , name )
251
- document = glm .Document (
252
- name = document_name , display_name = display_name , custom_metadata = custom_metadata
253
- )
254
- else :
255
- raise ValueError (
256
- f"Document name must be formatted as { self .name } /document/<document_name>."
257
- )
235
+ if valid_name (name ):
236
+ document_name = f"{ self .name } /documents/{ name } "
237
+ document = glm .Document (
238
+ name = document_name , display_name = display_name , custom_metadata = custom_metadata
239
+ )
240
+ else :
241
+ raise ValueError (NAME_ERROR_MSG .format (length = len (name ), name = name ))
258
242
259
243
request = glm .CreateDocumentRequest (parent = self .name , document = document )
260
244
response = await client .create_document (request )
@@ -431,7 +415,7 @@ async def query_async(
431
415
def delete_document (
432
416
self ,
433
417
name : str ,
434
- force : Optional [ bool ] = None ,
418
+ force : bool = False ,
435
419
client : glm .RetrieverServiceClient | None = None ,
436
420
):
437
421
"""
@@ -450,7 +434,7 @@ def delete_document(
450
434
async def delete_document_async (
451
435
self ,
452
436
name : str ,
453
- force : Optional [ bool ] = None ,
437
+ force : bool = False ,
454
438
client : glm .RetrieverServiceAsyncClient | None = None ,
455
439
):
456
440
"""This is the async version of `Corpus.delete_document`."""
@@ -528,17 +512,17 @@ class Document(abc.ABC):
528
512
529
513
def create_chunk (
530
514
self ,
531
- name : Optional [str ],
532
515
data : str | ChunkData ,
516
+ name : Optional [str ] = None ,
533
517
custom_metadata : Optional [list [CustomMetadata ]] = None ,
534
518
client : glm .RetrieverServiceClient | None = None ,
535
519
) -> Chunk :
536
520
"""
537
521
Create a `Chunk` object which has textual data.
538
522
539
523
Args:
540
- name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-).
541
524
data: The content for the `Chunk`, such as the text string.
525
+ name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-).
542
526
custom_metadata: User provided custom metadata stored as key-value pairs.
543
527
state: States for the lifecycle of a `Chunk`.
544
528
@@ -551,17 +535,13 @@ def create_chunk(
551
535
if client is None :
552
536
client = get_default_retriever_client ()
553
537
554
- chunk_name , chunk = "" , None
555
- if name :
556
- if re .match (_CHUNK_NAME_REGEX , name ):
557
- chunk_name = name
558
-
559
- elif "chunks/" not in name :
560
- chunk_name = f"{ self .name } /chunks/" + re .sub (_PATTERN , "" , name )
561
- else :
562
- raise ValueError (
563
- f"Chunk name must be formatted as { self .name } /chunks/<chunk_name>."
564
- )
538
+ chunk_name , chunk = None , None
539
+ if name is None :
540
+ chunk_name = None
541
+ elif valid_name (name ):
542
+ chunk_name = f"{ self .name } /chunks/{ name } "
543
+ else :
544
+ raise ValueError (NAME_ERROR_MSG .format (length = len (name ), name = name ))
565
545
566
546
if isinstance (data , str ):
567
547
chunk = glm .Chunk (
@@ -580,26 +560,22 @@ def create_chunk(
580
560
581
561
async def create_chunk_async (
582
562
self ,
583
- name : Optional [str ],
584
563
data : str | ChunkData ,
564
+ name : Optional [str ] = None ,
585
565
custom_metadata : Optional [list [CustomMetadata ]] = None ,
586
566
client : glm .RetrieverServiceAsyncClient | None = None ,
587
567
) -> Chunk :
588
568
"""This is the async version of `Document.create_chunk`."""
589
569
if client is None :
590
570
client = get_default_retriever_async_client ()
591
571
592
- chunk_name , chunk = "" , None
593
- if name :
594
- if re .match (_CHUNK_NAME_REGEX , name ):
595
- chunk_name = name
596
-
597
- elif "chunks/" not in name :
598
- chunk_name = f"{ self .name } /chunks/" + re .sub (_PATTERN , "" , name )
599
- else :
600
- raise ValueError (
601
- f"Chunk name must be formatted as { self .name } /chunks/<chunk_name>."
602
- )
572
+ chunk_name , chunk = None , None
573
+ if name is None :
574
+ chunk_name = None
575
+ elif valid_name (name ):
576
+ chunk_name = f"{ self .name } /chunks/{ name } "
577
+ else :
578
+ raise ValueError (NAME_ERROR_MSG .format (length = len (name ), name = name ))
603
579
604
580
if isinstance (data , str ):
605
581
chunk = glm .Chunk (
0 commit comments