From 1c8bdc7e0079d64528da9c6497b1691e18bc94b2 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Wed, 31 Jan 2024 16:09:22 -0800 Subject: [PATCH 01/20] Updated naming regex, made naming process simpler. --- google/generativeai/retriever.py | 53 ++++----- google/generativeai/types/retriever_types.py | 116 +++++++++---------- 2 files changed, 81 insertions(+), 88 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 091621415..01052423c 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -30,14 +30,11 @@ from google.generativeai.types import safety_types from google.generativeai.types.model_types import idecode_time -_CORPORA_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+") -_REMOVE = string.punctuation -_REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens -_PATTERN = r"[{}]".format(_REMOVE) # Create the pattern +_VALID_NAME = r"^[^-][a-zA-Z\d-][^-]+$" def create_corpus( - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, client: glm.RetrieverServiceClient | None = None, ) -> retriever_types.Corpus: @@ -62,18 +59,18 @@ def create_corpus( if client is None: client = get_default_retriever_client() - if not name and not display_name: - raise ValueError("Either the corpus name or display name must be specified.") - corpus = None - if name: - if re.match(_CORPORA_NAME_REGEX, name): - corpus = glm.Corpus(name=name, display_name=display_name) - elif "corpora/" not in name: - corpus_name = "corpora/" + re.sub(_PATTERN, "", name) - corpus = glm.Corpus(name=corpus_name, display_name=display_name) - else: - raise ValueError("Corpus name must be formatted as corpora/.") + if re.match(_VALID_NAME, name) and len(name) < 40: + corpus_name = "corpora/" + name # Construct the name + corpus = glm.Corpus(name=corpus_name, display_name=display_name) + else: + raise ValueError( + """ + `Corpus` name must be formatted as corpora/. Enter a `corpus_name` that + contains alphanumeric characters and/or dashes, but the name must not begin or end + with a dash. The name entered for the `Corpus` must be less than 40 characters. + """ + ) request = glm.CreateCorpusRequest(corpus=corpus) response = client.create_corpus(request) @@ -93,21 +90,21 @@ async def create_corpus_async( if client is None: client = get_default_retriever_async_client() - if not name and not display_name: - raise ValueError("Either the corpus name or display name must be specified.") - corpus = None - if name: - if re.match(_CORPORA_NAME_REGEX, name): - corpus = glm.Corpus(name=name, display_name=display_name) - elif "corpora/" not in name: - corpus_name = "corpora/" + re.sub(_PATTERN, "", name) - corpus = glm.Corpus(name=corpus_name, display_name=display_name) - else: - raise ValueError("Corpus name must be formatted as corpora/.") + if re.match(_VALID_NAME, name) and len(name) < 40: + corpus_name = "corpora/" + name # Construct the name + corpus = glm.Corpus(name=corpus_name, display_name=display_name) + else: + raise ValueError( + """ + `Corpus` name must be formatted as corpora/. Enter a `corpus_name` that + contains alphanumeric characters and/or dashes, but the name must not begin or end + with a dash. The name entered for the `Corpus` must be less than 40 characters. + """ + ) request = glm.CreateCorpusRequest(corpus=corpus) - response = await client.create_corpus(request) + response = client.create_corpus(request) response = type(response).to_dict(response) idecode_time(response, "create_time") idecode_time(response, "update_time") diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 36784327c..647443095 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -33,11 +33,12 @@ from google.generativeai.utils import flatten_update_paths -_DOCUMENT_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+/documents/[a-z0-9-]+$") -_CHUNK_NAME_REGEX = re.compile(r"^corpora/([^/]+?)(/documents/([^/]+?)(/chunks/([^/]+?))?)?$") -_REMOVE = string.punctuation -_REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens -_PATTERN = r"[{}]".format(_REMOVE) # Create the pattern +# _DOCUMENT_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+/documents/[a-z0-9-]+$") +# _CHUNK_NAME_REGEX = re.compile(r"^corpora/([^/]+?)(/documents/([^/]+?)(/chunks/([^/]+?))?)?$") +# _REMOVE = string.punctuation +# _REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens +# _PATTERN = r"[{}]".format(_REMOVE) # Create the pattern +_VALID_NAME = r"^[^-][a-zA-Z\d-][^-]+$" Operator = glm.Condition.Operator State = glm.Chunk.State @@ -174,7 +175,7 @@ class Corpus: def create_document( self, - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, @@ -197,24 +198,20 @@ def create_document( if client is None: client = get_default_retriever_client() - if not name and not display_name: - raise ValueError("Either the document name or display name must be specified.") - document = None - if name: - if re.match(_DOCUMENT_NAME_REGEX, name): - document = glm.Document( - name=name, display_name=display_name, custom_metadata=custom_metadata - ) - elif f"corpora/{self.name}/documents/" not in name: - document_name = f"{self.name}/documents/" + re.sub(_PATTERN, "", name) - document = glm.Document( - name=document_name, display_name=display_name, custom_metadata=custom_metadata - ) - else: - raise ValueError( - f"Document name must be formatted as {self.name}/document/." - ) + if re.match(_VALID_NAME, name) and len(name) < 40: + document_name = f"{self.name}/documents/{name}" + document = glm.Document( + name=document_name, display_name=display_name, custom_metadata=custom_metadata + ) + else: + raise ValueError( + f""" + Document name must be formatted as {self.name}/documents/. Enter a + `document_name` that contains alphanumeric characters and/or dashes, but the name must not + begin or end with a dash. The name entered for the `Document` must be less than 40 characters. + """ + ) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = client.create_document(request) @@ -222,7 +219,7 @@ def create_document( async def create_document_async( self, - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, @@ -235,20 +232,19 @@ async def create_document_async( raise ValueError("Either the document name or display name must be specified.") document = None - if name: - if re.match(_DOCUMENT_NAME_REGEX, name): - document = glm.Document( - name=name, display_name=display_name, custom_metadata=custom_metadata - ) - elif f"corpora/{self.name}/documents/" not in name: - document_name = f"{self.name}/documents/" + re.sub(_PATTERN, "", name) - document = glm.Document( - name=document_name, display_name=display_name, custom_metadata=custom_metadata - ) - else: - raise ValueError( - f"Document name must be formatted as {self.name}/document/." - ) + if re.match(_VALID_NAME, name) and len(name) < 40: + document_name = f"{self.name}/documents/{name}" + document = glm.Document( + name=document_name, display_name=display_name, custom_metadata=custom_metadata + ) + else: + raise ValueError( + f""" + Document name must be formatted as {self.name}/documents/. Enter a + `document_name` that contains alphanumeric characters and/or dashes, but the name must not + begin or end with a dash. The name entered for the `Document` must be less than 40 characters. + """ + ) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = await client.create_document(request) @@ -506,7 +502,7 @@ class Document(abc.ABC): def create_chunk( self, - name: Optional[str], + name: str, data: str | ChunkData, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, @@ -530,16 +526,16 @@ def create_chunk( client = get_default_retriever_client() chunk_name, chunk = "", None - if name: - if re.match(_CHUNK_NAME_REGEX, name): - chunk_name = name - - elif "chunks/" not in name: - chunk_name = f"{self.name}/chunks/" + re.sub(_PATTERN, "", name) - else: - raise ValueError( - f"Chunk name must be formatted as {self.name}/chunks/." - ) + if re.match(_VALID_NAME, name) and len(name) < 40: + chunk_name = f"{self.name}/chunks/{name}" + else: + raise ValueError( + f""" + Chunk name must be formatted as {self.name}/chunks/. Enter a `chunk_name` that + contains alphanumeric characters and/or dashes, but the name must not begin or end with a dash. + The name entered for the `Chunk` must be less than 40 characters. + """ + ) if isinstance(data, str): chunk = glm.Chunk( @@ -558,7 +554,7 @@ def create_chunk( async def create_chunk_async( self, - name: Optional[str], + name: str, data: str | ChunkData, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, @@ -568,16 +564,16 @@ async def create_chunk_async( client = get_default_retriever_async_client() chunk_name, chunk = "", None - if name: - if re.match(_CHUNK_NAME_REGEX, name): - chunk_name = name - - elif "chunks/" not in name: - chunk_name = f"{self.name}/chunks/" + re.sub(_PATTERN, "", name) - else: - raise ValueError( - f"Chunk name must be formatted as {self.name}/chunks/." - ) + if re.match(_VALID_NAME, name) and len(name) < 40: + chunk_name = f"{self.name}/chunks/{name}" + else: + raise ValueError( + f""" + Chunk name must be formatted as {self.name}/chunks/. Enter a `chunk_name` that + contains alphanumeric characters and/or dashes, but the name must not begin or end with a dash. + The name entered for the `Chunk` must be less than 40 characters. + """ + ) if isinstance(data, str): chunk = glm.Chunk( From 7e5caeb2c1bb184f22ae50771210c3d89a8b9127 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 11:00:20 -0800 Subject: [PATCH 02/20] Fixed non async test cases for naming --- google/generativeai/retriever.py | 2 +- google/generativeai/types/retriever_types.py | 14 +- tests/test_retriever.py | 161 ++++++------------- 3 files changed, 57 insertions(+), 120 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 01052423c..e26c19315 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -82,7 +82,7 @@ def create_corpus( async def create_corpus_async( - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, client: glm.RetrieverServiceAsyncClient | None = None, ) -> retriever_types.Corpus: diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index d6ee6d735..e1f1dfaf6 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -32,12 +32,6 @@ from google.generativeai.types.model_types import idecode_time from google.generativeai.utils import flatten_update_paths - -# _DOCUMENT_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+/documents/[a-z0-9-]+$") -# _CHUNK_NAME_REGEX = re.compile(r"^corpora/([^/]+?)(/documents/([^/]+?)(/chunks/([^/]+?))?)?$") -# _REMOVE = string.punctuation -# _REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens -# _PATTERN = r"[{}]".format(_REMOVE) # Create the pattern _VALID_NAME = r"^[^-][a-zA-Z\d-][^-]+$" Operator = glm.Condition.Operator @@ -641,7 +635,7 @@ def batch_create_chunks( _requests.append(c) elif isinstance(chunk, Mapping): for key, value in chunk.items(): - if re.match(_CHUNK_NAME_REGEX, value): + if re.match(_VALID_NAME, value): name = value elif isinstance(value, str): data = chunk[key] @@ -658,7 +652,7 @@ def batch_create_chunks( _requests.append(c) elif isinstance(chunk, tuple): for item in chunk: - if re.match(_CHUNK_NAME_REGEX, item): + if re.match(_VALID_NAME, item): name = item elif isinstance(item, str): data = item @@ -710,7 +704,7 @@ async def batch_create_chunks_async( _requests.append(c) elif isinstance(chunk, Mapping): for key, value in chunk.items(): - if re.match(_CHUNK_NAME_REGEX, value): + if re.match(_VALID_NAME, value): name = value elif isinstance(value, str): data = chunk[key] @@ -727,7 +721,7 @@ async def batch_create_chunks_async( _requests.append(c) elif isinstance(chunk, tuple): for item in chunk: - if re.match(_CHUNK_NAME_REGEX, item): + if re.match(_VALID_NAME, item): name = item elif isinstance(item, str): data = item diff --git a/tests/test_retriever.py b/tests/test_retriever.py index a0f9e4395..f00faf945 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -313,32 +313,19 @@ def batch_delete_chunks( ) -> None: self.observed_requests.append(request) - def test_create_corpus(self, display_name="demo_corpus"): - x = retriever.create_corpus(display_name=display_name) + def test_create_corpus(self, name="demo_corpus"): + x = retriever.create_corpus(name=name) self.assertIsInstance(x, retriever_service.Corpus) self.assertEqual("demo_corpus", x.display_name) self.assertEqual("corpora/demo_corpus", x.name) - @parameterized.named_parameters( - [ - dict(testcase_name="match_corpora_regex", name="corpora/demo_corpus"), - dict(testcase_name="no_corpora", name="demo_corpus"), - dict(testcase_name="with_punctuation", name="corpora/demo_corpus*(*)"), - dict(testcase_name="dash_at_start", name="-demo_corpus"), - ] - ) - def test_create_corpus_names(self, name): + def test_get_corpus(self, name="demo_corpus"): x = retriever.create_corpus(name=name) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) - - def test_get_corpus(self, display_name="demo_corpus"): - x = retriever.create_corpus(display_name=display_name) c = retriever.get_corpus(name=x.name) self.assertEqual("demo_corpus", c.display_name) def test_update_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") + demo_corpus = retriever.create_corpus(name="demo_corpus") update_request = demo_corpus.update(updates={"display_name": "demo_corpus_1"}) self.assertIsInstance(self.observed_requests[-1], glm.UpdateCorpusRequest) self.assertEqual("demo_corpus_1", demo_corpus.display_name) @@ -348,10 +335,10 @@ def test_list_corpora(self): self.assertEqual(len(x), 2) def test_query_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") demo_chunk = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) q = demo_corpus.query(query="What kind of chunk is this?") @@ -373,65 +360,47 @@ def test_query_corpus(self): ) def test_delete_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") delete_request = retriever.delete_corpus(name="corpora/demo_corpus", force=True) self.assertIsInstance(self.observed_requests[-1], glm.DeleteCorpusRequest) - def test_create_document(self, display_name="demo_doc"): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - x = demo_corpus.create_document(display_name=display_name) + def test_create_document(self, name="demo_doc"): + demo_corpus = retriever.create_corpus(name="demo_corpus") + x = demo_corpus.create_document(name=name) self.assertIsInstance(x, retriever_service.Document) self.assertEqual("demo_doc", x.display_name) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_document_regex", name="corpora/demo_corpus/documents/demo_doc" - ), - dict(testcase_name="no_document", name="corpora/demo_corpus/demo_document"), - dict( - testcase_name="with_punctuation", name="corpora/demo_corpus*(*)/documents/demo_doc" - ), - dict(testcase_name="dash_at_start", name="-demo_doc"), - ] - ) - def test_create_document_name(self, name): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") + def test_get_document(self, name="demo_doc"): + demo_corpus = retriever.create_corpus(name="demo_corpus") x = demo_corpus.create_document(name=name) - self.assertEqual("corpora/demo_corpus/documents/demo_doc", x.name) - self.assertEqual("demo_doc", x.display_name) - - def test_get_document(self, display_name="demo_doc"): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - x = demo_corpus.create_document(display_name=display_name) d = demo_corpus.get_document(name=x.name) self.assertEqual("demo_doc", d.display_name) def test_update_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") update_request = demo_document.update(updates={"display_name": "demo_doc_1"}) self.assertEqual("demo_doc_1", demo_document.display_name) def test_delete_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - demo_doc2 = demo_corpus.create_document(display_name="demo_doc_2") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") + demo_doc2 = demo_corpus.create_document(name="demo_doc_2") delete_request = demo_corpus.delete_document(name="corpora/demo_corpus/documents/demo_doc") self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) def test_list_documents(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - demo_doc2 = demo_corpus.create_document(display_name="demo_doc_2") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") + demo_doc2 = demo_corpus.create_document(name="demo_doc_2") self.assertLen(list(demo_corpus.list_documents()), 2) def test_query_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") demo_chunk = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) q = demo_document.query(query="What kind of chunk is this?") @@ -453,40 +422,16 @@ def test_query_document(self): ) def test_create_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_chunk_regex", - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", - ), - dict(testcase_name="no_chunk", name="corpora/demo_corpus/demo_document/demo_chunk"), - dict( - testcase_name="with_punctuation", - name="corpora/demo_corpus*(*)/documents/demo_doc/chunks*****/demo_chunk", - ), - dict(testcase_name="dash_at_start", name="-demo_chunk"), - dict(testcase_name="empty_value", name=""), - ] - ) - def test_create_chunk_name(self, name): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - x = demo_document.create_chunk( - name=name, - data="This is a demo chunk.", - ) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) - @parameterized.named_parameters( [ dict( @@ -518,32 +463,32 @@ def test_create_chunk_name(self, name): ] ) def test_batch_create_chunks(self, chunks): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") chunks = demo_document.batch_create_chunks(chunks=chunks) self.assertIsInstance(self.observed_requests[-1], glm.BatchCreateChunksRequest) self.assertEqual("This is a demo chunk.", chunks[0].data.string_value) self.assertEqual("This is another demo chunk.", chunks[1].data.string_value) def test_get_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) ch = demo_document.get_chunk(name=x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), ch.data) def test_list_chunks(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo_chunk_1", data="This is another demo chunk.", ) @@ -552,10 +497,10 @@ def test_list_chunks(self): self.assertLen(list_req, 2) def test_update_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) x.update(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -593,14 +538,14 @@ def test_update_chunk(self): ], ) def test_batch_update_chunks_data_structures(self, updates): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo_chunk_1", data="This is another demo chunk.", ) update_request = demo_document.batch_update_chunks(chunks=updates) @@ -613,26 +558,24 @@ def test_batch_update_chunks_data_structures(self, updates): ) def test_delete_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) - delete_request = demo_document.delete_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk" - ) + delete_request = demo_document.delete_chunk(name="demo_chunk") self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) def test_batch_delete_chunks(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo_corpus") + demo_document = demo_corpus.create_document(name="demo_doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo_chunk", data="This is another demo chunk.", ) delete_request = demo_document.batch_delete_chunks(chunks=[x.name, y.name]) From a2346d452629de09c661ad5f3709148bc2073a09 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 12:14:42 -0800 Subject: [PATCH 03/20] updated regex and made non async test cases pass --- google/generativeai/retriever.py | 9 +- google/generativeai/types/retriever_types.py | 2 +- tests/test_retriever.py | 167 ++++++++++--------- 3 files changed, 90 insertions(+), 88 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 661691e02..177ab24de 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -25,8 +25,7 @@ from google.generativeai.client import get_default_retriever_async_client from google.generativeai.types import retriever_types from google.generativeai.types.model_types import idecode_time - -_VALID_NAME = r"^[^-][a-zA-Z\d-][^-]+$" +from google.generativeai.types.retriever_types import _VALID_NAME def create_corpus( @@ -62,9 +61,9 @@ def create_corpus( else: raise ValueError( """ - `Corpus` name must be formatted as corpora/. Enter a `corpus_name` that - contains alphanumeric characters and/or dashes, but the name must not begin or end - with a dash. The name entered for the `Corpus` must be less than 40 characters. + Enter an alphanumeric name which can contain dashes that is less than 40 characters, + but the name must not begin or end with a dash for the `Corpus`. The name + entered will be formatted as corpora/. """ ) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index cd8136731..a466a683a 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -32,7 +32,7 @@ from google.generativeai.types.model_types import idecode_time from google.generativeai.utils import flatten_update_paths -_VALID_NAME = r"^[^-][a-zA-Z\d-][^-]+$" +_VALID_NAME = r"^[^-][a-z0-9\-]+[^-]$" Operator = glm.Condition.Operator State = glm.Chunk.State diff --git a/tests/test_retriever.py b/tests/test_retriever.py index f00faf945..e9c38e5d6 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -46,7 +46,7 @@ def create_corpus( self.observed_requests.append(request) return glm.Corpus( name="corpora/demo_corpus", - display_name="demo_corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -58,7 +58,7 @@ def get_corpus( self.observed_requests.append(request) return glm.Corpus( name="corpora/demo_corpus", - display_name="demo_corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -70,7 +70,7 @@ def update_corpus( self.observed_requests.append(request) return glm.Corpus( name="corpora/demo_corpus", - display_name="demo_corpus_1", + display_name="demo-corpus_1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -83,13 +83,13 @@ def list_corpora( return [ glm.Corpus( name="corpora/demo_corpus_1", - display_name="demo_corpus_1", + display_name="demo-corpus_1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Corpus( name="corpora/demo_corpus_2", - display_name="demo_corpus_2", + display_name="demo-corpus_2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), @@ -105,7 +105,7 @@ def query_corpus( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -127,7 +127,7 @@ def create_document( self.observed_requests.append(request) return glm.Document( name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -139,7 +139,7 @@ def get_document( self.observed_requests.append(request) return glm.Document( name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -151,7 +151,7 @@ def update_document( self.observed_requests.append(request) return glm.Document( name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc_1", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -164,13 +164,13 @@ def list_documents( return [ glm.Document( name="corpora/demo_corpus/documents/demo_doc_1", - display_name="demo_doc_1", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Document( name="corpora/demo_corpus/documents/demo_doc_2", - display_name="demo_doc_2", + display_name="demo-doc-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), @@ -192,7 +192,7 @@ def query_document( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -209,7 +209,7 @@ def create_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -243,7 +243,7 @@ def get_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -256,7 +256,7 @@ def list_chunks( self.observed_requests.append(request) return [ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -273,7 +273,7 @@ def list_chunks( def update_chunk(request: glm.UpdateChunkRequest) -> glm.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is an updated demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -287,7 +287,7 @@ def batch_update_chunks( return glm.BatchUpdateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is an updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -313,32 +313,32 @@ def batch_delete_chunks( ) -> None: self.observed_requests.append(request) - def test_create_corpus(self, name="demo_corpus"): + def test_create_corpus(self, name="demo-corpus"): x = retriever.create_corpus(name=name) self.assertIsInstance(x, retriever_service.Corpus) - self.assertEqual("demo_corpus", x.display_name) + self.assertEqual("demo-corpus", x.display_name) self.assertEqual("corpora/demo_corpus", x.name) - def test_get_corpus(self, name="demo_corpus"): + def test_get_corpus(self, name="demo-corpus"): x = retriever.create_corpus(name=name) c = retriever.get_corpus(name=x.name) - self.assertEqual("demo_corpus", c.display_name) + self.assertEqual("demo-corpus", c.display_name) def test_update_corpus(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - update_request = demo_corpus.update(updates={"display_name": "demo_corpus_1"}) + demo_corpus = retriever.create_corpus(name="demo-corpus") + update_request = demo_corpus.update(updates={"display_name": "demo-corpus_1"}) self.assertIsInstance(self.observed_requests[-1], glm.UpdateCorpusRequest) - self.assertEqual("demo_corpus_1", demo_corpus.display_name) + self.assertEqual("demo-corpus_1", demo_corpus.display_name) def test_list_corpora(self): x = list(retriever.list_corpora(page_size=1)) self.assertEqual(len(x), 2) def test_query_corpus(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") demo_chunk = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = demo_corpus.query(query="What kind of chunk is this?") @@ -348,7 +348,7 @@ def test_query_corpus(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -360,47 +360,47 @@ def test_query_corpus(self): ) def test_delete_corpus(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") delete_request = retriever.delete_corpus(name="corpora/demo_corpus", force=True) self.assertIsInstance(self.observed_requests[-1], glm.DeleteCorpusRequest) - def test_create_document(self, name="demo_doc"): - demo_corpus = retriever.create_corpus(name="demo_corpus") - x = demo_corpus.create_document(name=name) + def test_create_document(self, display_name="demo-doc"): + demo_corpus = retriever.create_corpus(name="demo-corpus") + x = demo_corpus.create_document(name=display_name) self.assertIsInstance(x, retriever_service.Document) - self.assertEqual("demo_doc", x.display_name) + self.assertEqual("demo-doc", x.display_name) - def test_get_document(self, name="demo_doc"): - demo_corpus = retriever.create_corpus(name="demo_corpus") - x = demo_corpus.create_document(name=name) + def test_get_document(self, display_name="demo-doc"): + demo_corpus = retriever.create_corpus(name="demo-corpus") + x = demo_corpus.create_document(name=display_name) d = demo_corpus.get_document(name=x.name) - self.assertEqual("demo_doc", d.display_name) + self.assertEqual("demo-doc", d.display_name) def test_update_document(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") - update_request = demo_document.update(updates={"display_name": "demo_doc_1"}) - self.assertEqual("demo_doc_1", demo_document.display_name) + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + update_request = demo_document.update(updates={"display_name": "demo-doc-1"}) + self.assertEqual("demo-doc-1", demo_document.display_name) def test_delete_document(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") - demo_doc2 = demo_corpus.create_document(name="demo_doc_2") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + demo_doc2 = demo_corpus.create_document(name="demo-doc-2") delete_request = demo_corpus.delete_document(name="corpora/demo_corpus/documents/demo_doc") self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) def test_list_documents(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") - demo_doc2 = demo_corpus.create_document(name="demo_doc_2") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + demo_doc2 = demo_corpus.create_document(name="demo-doc-2") self.assertLen(list(demo_corpus.list_documents()), 2) def test_query_document(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") demo_chunk = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = demo_document.query(query="What kind of chunk is this?") @@ -410,7 +410,7 @@ def test_query_document(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -422,16 +422,17 @@ def test_query_document(self): ) def test_create_chunk(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertEqual("demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + @parameterized.named_parameters( [ dict( @@ -463,32 +464,32 @@ def test_create_chunk(self): ] ) def test_batch_create_chunks(self, chunks): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") chunks = demo_document.batch_create_chunks(chunks=chunks) self.assertIsInstance(self.observed_requests[-1], glm.BatchCreateChunksRequest) self.assertEqual("This is a demo chunk.", chunks[0].data.string_value) self.assertEqual("This is another demo chunk.", chunks[1].data.string_value) def test_get_chunk(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) ch = demo_document.get_chunk(name=x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), ch.data) def test_list_chunks(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) @@ -497,10 +498,10 @@ def test_list_chunks(self): self.assertLen(list_req, 2) def test_update_chunk(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) x.update(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -514,7 +515,7 @@ def test_update_chunk(self): dict( testcase_name="dictionary_of_updates", updates={ - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk": { + "corpora/demo_corpus/documents/demo_doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1": { @@ -526,7 +527,7 @@ def test_update_chunk(self): testcase_name="list_of_tuples", updates=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + "corpora/demo_corpus/documents/demo_doc/chunks/demo-chunk", {"data": {"string_value": "This is an updated chunk."}}, ), ( @@ -538,14 +539,14 @@ def test_update_chunk(self): ], ) def test_batch_update_chunks_data_structures(self, updates): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) update_request = demo_document.batch_update_chunks(chunks=updates) @@ -558,24 +559,26 @@ def test_batch_update_chunks_data_structures(self, updates): ) def test_delete_chunk(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) - delete_request = demo_document.delete_chunk(name="demo_chunk") + delete_request = demo_document.delete_chunk( + name="demo-chunk" + ) self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) def test_batch_delete_chunks(self): - demo_corpus = retriever.create_corpus(name="demo_corpus") - demo_document = demo_corpus.create_document(name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="demo_chunk", + name="demo-chunk", data="This is another demo chunk.", ) delete_request = demo_document.batch_delete_chunks(chunks=[x.name, y.name]) From 25101ee9aaf38e895ba3f60f28717f41be020e7b Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 12:48:17 -0800 Subject: [PATCH 04/20] Updated regex to match proto --- google/generativeai/types/retriever_types.py | 2 +- tests/test_retriever.py | 65 +++-- tests/test_retriever_async.py | 253 ++++++++----------- 3 files changed, 143 insertions(+), 177 deletions(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index a466a683a..99cf86b0e 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -32,7 +32,7 @@ from google.generativeai.types.model_types import idecode_time from google.generativeai.utils import flatten_update_paths -_VALID_NAME = r"^[^-][a-z0-9\-]+[^-]$" +_VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" Operator = glm.Condition.Operator State = glm.Chunk.State diff --git a/tests/test_retriever.py b/tests/test_retriever.py index e9c38e5d6..3e1b320a4 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -69,8 +69,8 @@ def update_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo-corpus_1", + name="corpora/demo-corpus", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -82,14 +82,14 @@ def list_corpora( self.observed_requests.append(request) return [ glm.Corpus( - name="corpora/demo_corpus_1", - display_name="demo-corpus_1", + name="corpora/demo_corpus-1", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Corpus( - name="corpora/demo_corpus_2", - display_name="demo-corpus_2", + name="corpora/demo-corpus-2", + display_name="demo-corpus-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), @@ -105,7 +105,7 @@ def query_corpus( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -126,7 +126,7 @@ def create_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", + name="corpora/demo-corpus/documents/demo-doc", display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -138,7 +138,7 @@ def get_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", + name="corpora/demo-corpus/documents/demo_doc", display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -150,7 +150,7 @@ def update_document( ) -> glm.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", + name="corpora/demo-corpus/documents/demo_doc", display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -163,13 +163,13 @@ def list_documents( self.observed_requests.append(request) return [ glm.Document( - name="corpora/demo_corpus/documents/demo_doc_1", + name="corpora/demo-corpus/documents/demo_doc_1", display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Document( - name="corpora/demo_corpus/documents/demo_doc_2", + name="corpora/demo-corpus/documents/demo_doc_2", display_name="demo-doc-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -223,13 +223,13 @@ def batch_create_chunks( return glm.BatchCreateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc", + name="corpora/demo-corpus/documents/demo-doc/chunks/dc", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc1", + name="corpora/demo-corpus/documents/demo-doc/chunks/dc1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -243,7 +243,7 @@ def get_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -256,13 +256,13 @@ def list_chunks( self.observed_requests.append(request) return [ glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -273,7 +273,7 @@ def list_chunks( def update_chunk(request: glm.UpdateChunkRequest) -> glm.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -287,13 +287,13 @@ def batch_update_chunks( return glm.BatchUpdateChunksResponse( chunks=[ glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -348,7 +348,7 @@ def test_query_corpus(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -387,7 +387,7 @@ def test_delete_document(self): demo_corpus = retriever.create_corpus(name="demo-corpus") demo_document = demo_corpus.create_document(name="demo-doc") demo_doc2 = demo_corpus.create_document(name="demo-doc-2") - delete_request = demo_corpus.delete_document(name="corpora/demo_corpus/documents/demo_doc") + delete_request = demo_corpus.delete_document(name="corpora/demo-corpus/documents/demo_doc") self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) def test_list_documents(self): @@ -432,18 +432,17 @@ def test_create_chunk(self): self.assertEqual("demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( [ dict( testcase_name="dictionaries", chunks=[ { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "name": "corpora/demo-corpus/documents/demo-doc/chunks/dc", "data": "This is a demo chunk.", }, { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "name": "corpora/demo-corpus/documents/demo-doc/chunks/dc1", "data": "This is another demo chunk.", }, ], @@ -452,11 +451,11 @@ def test_create_chunk(self): testcase_name="tuples", chunks=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "corpora/demo-corpus/documents/demo-doc/chunks/dc", "This is a demo chunk.", ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "corpora/demo-corpus/documents/demo-doc/chunks/dc1", "This is another demo chunk.", ), ], @@ -515,10 +514,10 @@ def test_update_chunk(self): dict( testcase_name="dictionary_of_updates", updates={ - "corpora/demo_corpus/documents/demo_doc/chunks/demo-chunk": { + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -527,11 +526,11 @@ def test_update_chunk(self): testcase_name="list_of_tuples", updates=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo-chunk", + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", {"data": {"string_value": "This is another updated chunk."}}, ), ], @@ -565,9 +564,7 @@ def test_delete_chunk(self): name="demo-chunk", data="This is a demo chunk.", ) - delete_request = demo_document.delete_chunk( - name="demo-chunk" - ) + delete_request = demo_document.delete_chunk(name="demo-chunk") self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) def test_batch_delete_chunks(self): diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 2ecccb440..c721f9994 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -47,8 +47,8 @@ async def create_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus", + name="corpora/demo-corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -59,8 +59,8 @@ async def get_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus", + name="corpora/demo-corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -69,8 +69,8 @@ async def get_corpus( async def update_corpus(request: glm.UpdateCorpusRequest) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus_1", + name="corpora/demo-corpus", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -81,14 +81,14 @@ async def list_corpora(request: glm.ListCorporaRequest) -> glm.ListCorporaRespon async def results(): yield glm.Corpus( - name="corpora/demo_corpus_1", - display_name="demo_corpus_1", + name="corpora/demo-corpus-1", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Corpus( - name="corpora/demo_corpus_2", - display_name="demo_corpus_2", + name="corpora/demo-corpus_2", + display_name="demo-corpus-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -105,7 +105,7 @@ async def query_corpus( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -126,8 +126,8 @@ async def create_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -138,8 +138,8 @@ async def get_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -150,8 +150,8 @@ async def update_document( ) -> glm.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -164,14 +164,14 @@ async def list_documents( async def results(): yield glm.Document( - name="corpora/demo_corpus/documents/demo_doc_1", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/dem-doc_1", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Document( - name="corpora/demo_corpus/documents/demo_doc_2", - display_name="demo_doc_2", + name="corpora/demo-corpus/documents/dem-doc_2", + display_name="demo-doc_2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -194,7 +194,7 @@ async def query_document( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -211,7 +211,7 @@ async def create_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -225,13 +225,13 @@ async def batch_create_chunks( return glm.BatchCreateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc", + name="corpora/demo-corpus/documents/dem-doc/chunks/dc", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc1", + name="corpora/demo-corpus/documents/dem-doc/chunks/dc1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -245,7 +245,7 @@ async def get_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -259,13 +259,13 @@ async def list_chunks( async def results(): yield glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -277,7 +277,7 @@ async def results(): async def update_chunk(request: glm.UpdateChunkRequest) -> glm.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -291,13 +291,13 @@ async def batch_update_chunks( return glm.BatchUpdateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -317,32 +317,19 @@ async def batch_delete_chunks( ) -> None: self.observed_requests.append(request) - async def test_create_corpus(self, display_name="demo_corpus"): - x = await retriever.create_corpus_async(display_name=display_name) + async def test_create_corpus(self, name="demo-corpus"): + x = await retriever.create_corpus_async(name=name) self.assertIsInstance(x, retriever_service.Corpus) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) + self.assertEqual("demo-corpus", x.display_name) + self.assertEqual("corpora/demo-corpus", x.name) - @parameterized.named_parameters( - [ - dict(testcase_name="match_corpora_regex", name="corpora/demo_corpus"), - dict(testcase_name="no_corpora", name="demo_corpus"), - dict(testcase_name="with_punctuation", name="corpora/demo_corpus*(*)"), - dict(testcase_name="dash_at_start", name="-demo_corpus"), - ] - ) - async def test_create_corpus_names(self, name): + async def test_get_corpus(self, name="demo-corpus"): x = await retriever.create_corpus_async(name=name) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) - - async def test_get_corpus(self, display_name="demo_corpus"): - x = await retriever.create_corpus_async(display_name=display_name) c = await retriever.get_corpus_async(name=x.name) - self.assertEqual("demo_corpus", c.display_name) + self.assertEqual("demo-corpus", c.display_name) async def test_update_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") update_request = await demo_corpus.update_async(updates={"display_name": "demo_corpus_1"}) self.assertEqual("demo_corpus_1", demo_corpus.display_name) @@ -354,10 +341,10 @@ async def test_list_corpora(self): self.assertEqual(len(result), 2) async def test_query_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") demo_chunk = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = await demo_corpus.query_async(query="What kind of chunk is this?") @@ -367,7 +354,7 @@ async def test_query_corpus(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -379,67 +366,49 @@ async def test_query_corpus(self): ) async def test_delete_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - delete_request = await retriever.delete_corpus_async(name="corpora/demo_corpus", force=True) + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + delete_request = await retriever.delete_corpus_async(name="corpora/demo-corpus", force=True) self.assertIsInstance(self.observed_requests[-1], glm.DeleteCorpusRequest) - async def test_create_document(self, display_name="demo_doc"): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(display_name=display_name) + async def test_create_document(self, display_name="demo-doc"): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + x = await demo_corpus.create_document_async(name=display_name) self.assertIsInstance(x, retriever_service.Document) - self.assertEqual("demo_doc", x.display_name) + self.assertEqual("demo-doc", x.display_name) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_document_regex", name="corpora/demo_corpus/documents/demo_doc" - ), - dict(testcase_name="no_document", name="corpora/demo_corpus/demo_document"), - dict( - testcase_name="with_punctuation", name="corpora/demo_corpus*(*)/documents/demo_doc" - ), - dict(testcase_name="dash_at_start", name="-demo_doc"), - ] - ) - async def test_create_document_name(self, name): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(name=name) - self.assertEqual("corpora/demo_corpus/documents/demo_doc", x.name) - self.assertEqual("demo_doc", x.display_name) - - async def test_get_document(self, display_name="demo_doc"): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(display_name=display_name) + async def test_get_document(self, display_name="demo-doc"): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + x = await demo_corpus.create_document_async(name=display_name) d = await demo_corpus.get_document_async(name=x.name) - self.assertEqual("demo_doc", d.display_name) + self.assertEqual("demo-doc", d.display_name) async def test_update_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - update_request = await demo_document.update_async(updates={"display_name": "demo_doc_1"}) - self.assertEqual("demo_doc_1", demo_document.display_name) + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + update_request = await demo_document.update_async(updates={"display_name": "demo-doc-1"}) + self.assertEqual("demo-doc-1", demo_document.display_name) async def test_delete_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - demo_doc2 = await demo_corpus.create_document_async(display_name="demo_doc_2") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + demo_doc2 = await demo_corpus.create_document_async(name="demo-doc-2") delete_request = await demo_corpus.delete_document_async( - name="corpora/demo_corpus/documents/demo_doc" + name="corpora/demo-corpus/documents/demo-doc" ) self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) async def test_list_documents(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - demo_doc2 = await demo_corpus.create_document_async(display_name="demo_doc_2") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + demo_doc2 = await demo_corpus.create_document_async(name="demo-doc_2") self.assertLen(list(demo_corpus.list_documents()), 2) async def test_query_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") demo_chunk = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) q = await demo_document.query_async(query="What kind of chunk is this?") @@ -449,7 +418,7 @@ async def test_query_document(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -461,38 +430,38 @@ async def test_query_document(self): ) async def test_create_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) @parameterized.named_parameters( [ dict( testcase_name="match_chunk_regex", - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", ), - dict(testcase_name="no_chunk", name="corpora/demo_corpus/demo_document/demo_chunk"), + dict(testcase_name="no_chunk", name="corpora/demo-corpus/dem-document/demo-chunk"), dict( testcase_name="with_punctuation", - name="corpora/demo_corpus*(*)/documents/demo_doc/chunks*****/demo_chunk", + name="corpora/demo-corpus*(*)/documents/dem-doc/chunks*****/demo-chunk", ), - dict(testcase_name="dash_at_start", name="-demo_chunk"), + dict(testcase_name="dash_at_start", name="-demo-chunk"), ] ) async def test_create_chunk_name(self, name): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( name=name, data="This is a demo chunk.", ) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) @parameterized.named_parameters( [ @@ -500,11 +469,11 @@ async def test_create_chunk_name(self, name): testcase_name="dictionaries", chunks=[ { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "name": "corpora/demo-corpus/documents/dem-doc/chunks/dc", "data": "This is a demo chunk.", }, { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "name": "corpora/demo-corpus/documents/dem-doc/chunks/dc1", "data": "This is another demo chunk.", }, ], @@ -513,11 +482,11 @@ async def test_create_chunk_name(self, name): testcase_name="tuples", chunks=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "corpora/demo-corpus/documents/dem-doc/chunks/dc", "This is a demo chunk.", ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "corpora/demo-corpus/documents/dem-doc/chunks/dc1", "This is another demo chunk.", ), ], @@ -525,32 +494,32 @@ async def test_create_chunk_name(self, name): ] ) async def test_batch_create_chunks(self, chunks): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") chunks = await demo_document.batch_create_chunks_async(chunks=chunks) self.assertIsInstance(self.observed_requests[-1], glm.BatchCreateChunksRequest) self.assertEqual("This is a demo chunk.", chunks[0].data.string_value) self.assertEqual("This is another demo chunk.", chunks[1].data.string_value) async def test_get_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) ch = await demo_document.get_chunk_async(name=x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), ch.data) async def test_list_chunks(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", data="This is another demo chunk.", ) chunks = [] @@ -560,10 +529,10 @@ async def test_list_chunks(self): self.assertLen(chunks, 2) async def test_update_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) await x.update_async(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -577,10 +546,10 @@ async def test_update_chunk(self): dict( testcase_name="dictionary_of_updates", updates={ - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk": { + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -589,11 +558,11 @@ async def test_update_chunk(self): testcase_name="list_of_tuples", updates=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", {"data": {"string_value": "This is another updated chunk."}}, ), ], @@ -601,14 +570,14 @@ async def test_update_chunk(self): ], ) async def test_batch_update_chunks_data_structures(self, updates): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", data="This is another demo chunk.", ) update_request = await demo_document.batch_update_chunks_async(chunks=updates) @@ -621,26 +590,26 @@ async def test_batch_update_chunks_data_structures(self, updates): ) async def test_delete_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) delete_request = await demo_document.delete_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk" + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk" ) self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) async def test_batch_delete_chunks(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is another demo chunk.", ) delete_request = await demo_document.batch_delete_chunks_async(chunks=[x.name, y.name]) From ca853a9f21220a8d6b6dd2b958de159cad134788 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 12:58:36 -0800 Subject: [PATCH 05/20] Fixing missing await keywords --- google/generativeai/retriever.py | 2 +- tests/test_retriever.py | 8 +++---- tests/test_retriever_async.py | 37 ++++++-------------------------- 3 files changed, 12 insertions(+), 35 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 177ab24de..d12a5e73c 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -99,7 +99,7 @@ async def create_corpus_async( ) request = glm.CreateCorpusRequest(corpus=corpus) - response = client.create_corpus(request) + response = await client.create_corpus(request) response = type(response).to_dict(response) idecode_time(response, "create_time") idecode_time(response, "update_time") diff --git a/tests/test_retriever.py b/tests/test_retriever.py index 3e1b320a4..24265d84e 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -262,7 +262,7 @@ def list_chunks( update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -293,7 +293,7 @@ def batch_update_chunks( update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -517,7 +517,7 @@ def test_update_chunk(self): "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -530,7 +530,7 @@ def test_update_chunk(self): {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo-corpus/documents/demo-doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", {"data": {"string_value": "This is another updated chunk."}}, ), ], diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index c721f9994..8c4023b35 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -265,7 +265,7 @@ async def results(): update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Chunk( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -297,7 +297,7 @@ async def batch_update_chunks( update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -401,7 +401,7 @@ async def test_delete_document(self): async def test_list_documents(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") - demo_doc2 = await demo_corpus.create_document_async(name="demo-doc_2") + demo_doc2 = await demo_corpus.create_document_async(name="demo-doc-2") self.assertLen(list(demo_corpus.list_documents()), 2) async def test_query_document(self): @@ -440,29 +440,6 @@ async def test_create_chunk(self): self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_chunk_regex", - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", - ), - dict(testcase_name="no_chunk", name="corpora/demo-corpus/dem-document/demo-chunk"), - dict( - testcase_name="with_punctuation", - name="corpora/demo-corpus*(*)/documents/dem-doc/chunks*****/demo-chunk", - ), - dict(testcase_name="dash_at_start", name="-demo-chunk"), - ] - ) - async def test_create_chunk_name(self, name): - demo_corpus = await retriever.create_corpus_async(name="demo-corpus") - demo_document = await demo_corpus.create_document_async(name="demo-doc") - x = await demo_document.create_chunk_async( - name=name, - data="This is a demo chunk.", - ) - self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) - @parameterized.named_parameters( [ dict( @@ -519,7 +496,7 @@ async def test_list_chunks(self): data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data="This is another demo chunk.", ) chunks = [] @@ -549,7 +526,7 @@ async def test_update_chunk(self): "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -562,7 +539,7 @@ async def test_update_chunk(self): {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", {"data": {"string_value": "This is another updated chunk."}}, ), ], @@ -577,7 +554,7 @@ async def test_batch_update_chunks_data_structures(self, updates): data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data="This is another demo chunk.", ) update_request = await demo_document.batch_update_chunks_async(chunks=updates) From 2da63eea56d028a25ed19f9344dc7f1379a26f44 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 13:09:21 -0800 Subject: [PATCH 06/20] Update async tests --- tests/test_retriever_async.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 8c4023b35..29d52e0a7 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -408,7 +408,7 @@ async def test_query_document(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") demo_chunk = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = await demo_document.query_async(query="What kind of chunk is this?") @@ -433,7 +433,7 @@ async def test_create_chunk(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) @@ -482,7 +482,7 @@ async def test_get_chunk(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) ch = await demo_document.get_chunk_async(name=x.name) @@ -492,11 +492,11 @@ async def test_list_chunks(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", + name="demo-chunk-1", data="This is another demo chunk.", ) chunks = [] @@ -509,7 +509,7 @@ async def test_update_chunk(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) await x.update_async(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -550,11 +550,11 @@ async def test_batch_update_chunks_data_structures(self, updates): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", + name="demo-chunk-1", data="This is another demo chunk.", ) update_request = await demo_document.batch_update_chunks_async(chunks=updates) @@ -570,7 +570,7 @@ async def test_delete_chunk(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) delete_request = await demo_document.delete_chunk_async( @@ -582,11 +582,11 @@ async def test_batch_delete_chunks(self): demo_corpus = await retriever.create_corpus_async(name="demo-corpus") demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", + name="demo-chunk", data="This is another demo chunk.", ) delete_request = await demo_document.batch_delete_chunks_async(chunks=[x.name, y.name]) From 8d7cdf694a8ae2a8eab99fe30f17b329a4f664f5 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 13:25:41 -0800 Subject: [PATCH 07/20] Update error messages --- google/generativeai/retriever.py | 12 ++-------- google/generativeai/types/retriever_types.py | 24 ++++---------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index d12a5e73c..127cdff1b 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -60,11 +60,7 @@ def create_corpus( corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: raise ValueError( - """ - Enter an alphanumeric name which can contain dashes that is less than 40 characters, - but the name must not begin or end with a dash for the `Corpus`. The name - entered will be formatted as corpora/. - """ + " `name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Corpus`. The name entered will be formatted as corpora/." ) request = glm.CreateCorpusRequest(corpus=corpus) @@ -91,11 +87,7 @@ async def create_corpus_async( corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: raise ValueError( - """ - `Corpus` name must be formatted as corpora/. Enter a `corpus_name` that - contains alphanumeric characters and/or dashes, but the name must not begin or end - with a dash. The name entered for the `Corpus` must be less than 40 characters. - """ + " `name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Corpus`. The name entered will be formatted as corpora/." ) request = glm.CreateCorpusRequest(corpus=corpus) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 99cf86b0e..4389c37ed 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -206,11 +206,7 @@ def create_document( ) else: raise ValueError( - f""" - Document name must be formatted as {self.name}/documents/. Enter a - `document_name` that contains alphanumeric characters and/or dashes, but the name must not - begin or end with a dash. The name entered for the `Document` must be less than 40 characters. - """ + f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Document`. The name entered will be formatted as {self.name}/documents/." ) request = glm.CreateDocumentRequest(parent=self.name, document=document) @@ -239,11 +235,7 @@ async def create_document_async( ) else: raise ValueError( - f""" - Document name must be formatted as {self.name}/documents/. Enter a - `document_name` that contains alphanumeric characters and/or dashes, but the name must not - begin or end with a dash. The name entered for the `Document` must be less than 40 characters. - """ + f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Document`. The name entered will be formatted as {self.name}/documents/." ) request = glm.CreateDocumentRequest(parent=self.name, document=document) @@ -546,11 +538,7 @@ def create_chunk( chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f""" - Chunk name must be formatted as {self.name}/chunks/. Enter a `chunk_name` that - contains alphanumeric characters and/or dashes, but the name must not begin or end with a dash. - The name entered for the `Chunk` must be less than 40 characters. - """ + f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Chunk`. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): @@ -584,11 +572,7 @@ async def create_chunk_async( chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f""" - Chunk name must be formatted as {self.name}/chunks/. Enter a `chunk_name` that - contains alphanumeric characters and/or dashes, but the name must not begin or end with a dash. - The name entered for the `Chunk` must be less than 40 characters. - """ + f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Chunk`. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): From c8a75308ce8098f5a9fa6d6b1a8e0cb522441889 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 13:28:21 -0800 Subject: [PATCH 08/20] Fixed async test case for create_document --- google/generativeai/types/retriever_types.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 4389c37ed..2d504aad3 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -224,9 +224,6 @@ async def create_document_async( if client is None: client = get_default_retriever_async_client() - if not name and not display_name: - raise ValueError("Either the document name or display name must be specified.") - document = None if re.match(_VALID_NAME, name) and len(name) < 40: document_name = f"{self.name}/documents/{name}" From 1c590c8726327a14b489a58465e2a6a07db2652f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 5 Feb 2024 14:02:57 -0800 Subject: [PATCH 09/20] Add a valid_name function. --- google/generativeai/retriever.py | 6 +++--- google/generativeai/types/retriever_types.py | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 127cdff1b..ebf271367 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -25,7 +25,7 @@ from google.generativeai.client import get_default_retriever_async_client from google.generativeai.types import retriever_types from google.generativeai.types.model_types import idecode_time -from google.generativeai.types.retriever_types import _VALID_NAME +from google.generativeai.types import retriever_types def create_corpus( @@ -55,7 +55,7 @@ def create_corpus( client = get_default_retriever_client() corpus = None - if re.match(_VALID_NAME, name) and len(name) < 40: + if retriever_types.valid_name(name): corpus_name = "corpora/" + name # Construct the name corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: @@ -82,7 +82,7 @@ async def create_corpus_async( client = get_default_retriever_async_client() corpus = None - if re.match(_VALID_NAME, name) and len(name) < 40: + if retriever_types.valid_name(name): corpus_name = "corpora/" + name # Construct the name corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 2d504aad3..1b9f2b784 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -34,6 +34,9 @@ _VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" +def valid_name(name) + return re.match(_VALID_NAME, name) and len(name) < 40 + Operator = glm.Condition.Operator State = glm.Chunk.State @@ -199,7 +202,7 @@ def create_document( client = get_default_retriever_client() document = None - if re.match(_VALID_NAME, name) and len(name) < 40: + if valid_name(name): document_name = f"{self.name}/documents/{name}" document = glm.Document( name=document_name, display_name=display_name, custom_metadata=custom_metadata @@ -225,7 +228,7 @@ async def create_document_async( client = get_default_retriever_async_client() document = None - if re.match(_VALID_NAME, name) and len(name) < 40: + if valid_name(name): document_name = f"{self.name}/documents/{name}" document = glm.Document( name=document_name, display_name=display_name, custom_metadata=custom_metadata @@ -531,7 +534,7 @@ def create_chunk( client = get_default_retriever_client() chunk_name, chunk = "", None - if re.match(_VALID_NAME, name) and len(name) < 40: + if valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( @@ -565,7 +568,7 @@ async def create_chunk_async( client = get_default_retriever_async_client() chunk_name, chunk = "", None - if re.match(_VALID_NAME, name) and len(name) < 40: + if valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( From 66d6aa9f371c57447d734fdf7047dc81385a80b4 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 14:12:46 -0800 Subject: [PATCH 10/20] Added missing colon to valid_name function --- google/generativeai/types/retriever_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 1b9f2b784..aace124dc 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -34,7 +34,7 @@ _VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" -def valid_name(name) +def valid_name(name): return re.match(_VALID_NAME, name) and len(name) < 40 Operator = glm.Condition.Operator From da1eafaea3e9e19af0d4f8c663cb3f35d9dddfb6 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 14:21:33 -0800 Subject: [PATCH 11/20] Removed duplicate import, updated error message --- google/generativeai/retriever.py | 5 ++--- google/generativeai/types/retriever_types.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index ebf271367..de8386642 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -23,7 +23,6 @@ from google.generativeai.client import get_default_retriever_client from google.generativeai.client import get_default_retriever_async_client -from google.generativeai.types import retriever_types from google.generativeai.types.model_types import idecode_time from google.generativeai.types import retriever_types @@ -60,7 +59,7 @@ def create_corpus( corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: raise ValueError( - " `name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Corpus`. The name entered will be formatted as corpora/." + f"{retriever_types._NAME_ERROR_MESSAGE}`Corpus`. The name entered will be formatted as corpora/." ) request = glm.CreateCorpusRequest(corpus=corpus) @@ -87,7 +86,7 @@ async def create_corpus_async( corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: raise ValueError( - " `name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Corpus`. The name entered will be formatted as corpora/." + f"{retriever_types._NAME_ERROR_MESSAGE}`Corpus`. The name entered will be formatted as corpora/." ) request = glm.CreateCorpusRequest(corpus=corpus) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index aace124dc..3e2ee25a7 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -33,9 +33,12 @@ from google.generativeai.utils import flatten_update_paths _VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" +_NAME_ERROR_MESSAGE = "`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the " + def valid_name(name): - return re.match(_VALID_NAME, name) and len(name) < 40 + return re.match(_VALID_NAME, name) and len(name) < 40 + Operator = glm.Condition.Operator State = glm.Chunk.State @@ -209,7 +212,7 @@ def create_document( ) else: raise ValueError( - f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Document`. The name entered will be formatted as {self.name}/documents/." + f"{_NAME_ERROR_MESSAGE}`Document`. The name entered will be formatted as {self.name}/documents/." ) request = glm.CreateDocumentRequest(parent=self.name, document=document) @@ -235,7 +238,7 @@ async def create_document_async( ) else: raise ValueError( - f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Document`. The name entered will be formatted as {self.name}/documents/." + f"{_NAME_ERROR_MESSAGE}`Document`. The name entered will be formatted as {self.name}/documents/." ) request = glm.CreateDocumentRequest(parent=self.name, document=document) @@ -538,7 +541,7 @@ def create_chunk( chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Chunk`. The name entered will be formatted as {self.name}/chunk/." + f"{_NAME_ERROR_MESSAGE}`Chunk`. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): @@ -572,7 +575,7 @@ async def create_chunk_async( chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f"`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the `Chunk`. The name entered will be formatted as {self.name}/chunk/." + f"{_NAME_ERROR_MESSAGE}`Chunk`. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): From 49062c4fe02830df0b49e785bb5df5404b61f8a0 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 14:53:06 -0800 Subject: [PATCH 12/20] Added bool = Force to delete functions --- google/generativeai/retriever.py | 4 ++-- google/generativeai/types/retriever_types.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index de8386642..52317d87b 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -134,7 +134,7 @@ async def get_corpus_async(name: str, client: glm.RetrieverServiceAsyncClient | return response -def delete_corpus(name: str, force: bool, client: glm.RetrieverServiceClient | None = None): # fmt: skip +def delete_corpus(name: str, force: bool = False, client: glm.RetrieverServiceClient | None = None): # fmt: skip """ Delete a `Corpus` from the service. @@ -149,7 +149,7 @@ def delete_corpus(name: str, force: bool, client: glm.RetrieverServiceClient | N client.delete_corpus(request) -async def delete_corpus_async(name: str, force: bool, client: glm.RetrieverServiceAsyncClient | None = None): # fmt: skip +async def delete_corpus_async(name: str, force: bool = False, client: glm.RetrieverServiceAsyncClient | None = None): # fmt: skip """This is the async version of `retriever.delete_corpus`.""" if client is None: client = get_default_retriever_async_client() diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 3e2ee25a7..ed0681755 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -416,7 +416,7 @@ async def query_async( def delete_document( self, name: str, - force: Optional[bool] = None, + force: bool = False, client: glm.RetrieverServiceClient | None = None, ): """ @@ -435,7 +435,7 @@ def delete_document( async def delete_document_async( self, name: str, - force: Optional[bool] = None, + force: bool = False, client: glm.RetrieverServiceAsyncClient | None = None, ): """This is the async version of `Corpus.delete_document`.""" From df4b0b6ceb5820c63c0ffb37bd80180a6d44f1c4 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 14:56:02 -0800 Subject: [PATCH 13/20] Optional string for chunk name --- google/generativeai/types/retriever_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index ed0681755..d69345bf0 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -513,7 +513,7 @@ class Document(abc.ABC): def create_chunk( self, - name: str, + name: str | None, data: str | ChunkData, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, @@ -561,7 +561,7 @@ def create_chunk( async def create_chunk_async( self, - name: str, + name: str | None, data: str | ChunkData, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, From 62f684c1a08b427dcf0e10af73af8aa0d76fad5d Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 14:59:56 -0800 Subject: [PATCH 14/20] Change to optional param for hcunk name --- google/generativeai/types/retriever_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index d69345bf0..79b587829 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -513,8 +513,8 @@ class Document(abc.ABC): def create_chunk( self, - name: str | None, data: str | ChunkData, + name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, ) -> Chunk: @@ -561,8 +561,8 @@ def create_chunk( async def create_chunk_async( self, - name: str | None, data: str | ChunkData, + name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, ) -> Chunk: From 9787e79106bafec096a9f3b04bc365104886561e Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:08:44 -0800 Subject: [PATCH 15/20] Add option for empty name for chunk and test cases --- google/generativeai/types/retriever_types.py | 18 +++++++++++------- tests/test_retriever.py | 10 ++++++++++ tests/test_retriever_async.py | 10 ++++++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 79b587829..2d95e17f0 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -522,8 +522,8 @@ def create_chunk( Create a `Chunk` object which has textual data. Args: - name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-). data: The content for the `Chunk`, such as the text string. + name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-). custom_metadata: User provided custom metadata stored as key-value pairs. state: States for the lifecycle of a `Chunk`. @@ -536,12 +536,14 @@ def create_chunk( if client is None: client = get_default_retriever_client() - chunk_name, chunk = "", None - if valid_name(name): + chunk_name, chunk = None, None + if name is None: + chunk_name = None + elif valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Chunk`. The name entered will be formatted as {self.name}/chunk/." + f"{_NAME_ERROR_MESSAGE}`Chunk`. An empty name can also be passed in. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): @@ -570,12 +572,14 @@ async def create_chunk_async( if client is None: client = get_default_retriever_async_client() - chunk_name, chunk = "", None - if valid_name(name): + chunk_name, chunk = None, None + if name is None: + chunk_name = None + elif valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Chunk`. The name entered will be formatted as {self.name}/chunk/." + f"{_NAME_ERROR_MESSAGE}`Chunk`. An empty name can also be passed in. The name entered will be formatted as {self.name}/chunk/." ) if isinstance(data, str): diff --git a/tests/test_retriever.py b/tests/test_retriever.py index 24265d84e..35e6c74db 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -432,6 +432,16 @@ def test_create_chunk(self): self.assertEqual("demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + def test_create_chunk_empty(self): + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + x = demo_document.create_chunk( + data="This is a demo chunk.", + ) + self.assertIsInstance(x, retriever_service.Chunk) + self.assertEqual("demo-chunk", x.name) + self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + @parameterized.named_parameters( [ dict( diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 29d52e0a7..8b3f1c257 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -440,6 +440,16 @@ async def test_create_chunk(self): self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + async def test_create_chunk_empty(self): + demo_corpus = await retriever.create_corpus(name="demo-corpus") + demo_document = await demo_corpus.create_document(name="demo-doc") + x = await demo_document.create_chunk( + data="This is a demo chunk.", + ) + self.assertIsInstance(x, retriever_service.Chunk) + self.assertEqual("demo-chunk", x.name) + self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + @parameterized.named_parameters( [ dict( From 9a33c6d62fbe65be205f90739eb04c463c10bb16 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:10:18 -0800 Subject: [PATCH 16/20] add _async suffix to test case --- tests/test_retriever_async.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 8b3f1c257..be78c5576 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -441,9 +441,9 @@ async def test_create_chunk(self): self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) async def test_create_chunk_empty(self): - demo_corpus = await retriever.create_corpus(name="demo-corpus") - demo_document = await demo_corpus.create_document(name="demo-doc") - x = await demo_document.create_chunk( + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + x = await demo_document.create_chunk_async( data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) From bd93a7e3dfa7486b5ba3089b580fd418e85195cd Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:18:23 -0800 Subject: [PATCH 17/20] test async for empty chunk name --- tests/test_retriever_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index be78c5576..1e6974675 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -437,7 +437,7 @@ async def test_create_chunk(self): data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) + self.assertEqual("demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) async def test_create_chunk_empty(self): From ed95b1ce59f4c256c88f6ecc0641066c506e8da8 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:20:32 -0800 Subject: [PATCH 18/20] Pass type checking --- tests/test_retriever_async.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 1e6974675..0fd35f4ac 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -440,16 +440,6 @@ async def test_create_chunk(self): self.assertEqual("demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - async def test_create_chunk_empty(self): - demo_corpus = await retriever.create_corpus_async(name="demo-corpus") - demo_document = await demo_corpus.create_document_async(name="demo-doc") - x = await demo_document.create_chunk_async( - data="This is a demo chunk.", - ) - self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("demo-chunk", x.name) - self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( [ dict( From c3c2b10ad9b66befd92629bcda5268df15fc2a0a Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:38:21 -0800 Subject: [PATCH 19/20] Fixed test cases --- tests/test_retriever.py | 6 +++--- tests/test_retriever_async.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_retriever.py b/tests/test_retriever.py index 35e6c74db..2a0071095 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -209,7 +209,7 @@ def create_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="demo-chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -429,7 +429,7 @@ def test_create_chunk(self): data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("demo-chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) def test_create_chunk_empty(self): @@ -439,7 +439,7 @@ def test_create_chunk_empty(self): data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("demo-chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) @parameterized.named_parameters( diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 0fd35f4ac..6512e268f 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -437,7 +437,17 @@ async def test_create_chunk(self): data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("demo-chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) + self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) + + async def test_create_chunk_empty(self): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + x = await demo_document.create_chunk_async( + data="This is a demo chunk.", + ) + self.assertIsInstance(x, retriever_service.Chunk) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) @parameterized.named_parameters( From 8d33a2265054fdd3485527ad50c68cb5618787f4 Mon Sep 17 00:00:00 2001 From: Shilpa Kancharla Date: Mon, 5 Feb 2024 15:43:11 -0800 Subject: [PATCH 20/20] updated naming error msg --- google/generativeai/retriever.py | 8 ++------ google/generativeai/types/retriever_types.py | 21 ++++++++------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index 52317d87b..f6ea84748 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -58,9 +58,7 @@ def create_corpus( corpus_name = "corpora/" + name # Construct the name corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: - raise ValueError( - f"{retriever_types._NAME_ERROR_MESSAGE}`Corpus`. The name entered will be formatted as corpora/." - ) + raise ValueError(retriever_types.NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateCorpusRequest(corpus=corpus) response = client.create_corpus(request) @@ -85,9 +83,7 @@ async def create_corpus_async( corpus_name = "corpora/" + name # Construct the name corpus = glm.Corpus(name=corpus_name, display_name=display_name) else: - raise ValueError( - f"{retriever_types._NAME_ERROR_MESSAGE}`Corpus`. The name entered will be formatted as corpora/." - ) + raise ValueError(retriever_types.NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateCorpusRequest(corpus=corpus) response = await client.create_corpus(request) diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index 2d95e17f0..f8e210459 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -33,7 +33,10 @@ from google.generativeai.utils import flatten_update_paths _VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" -_NAME_ERROR_MESSAGE = "`name` parameter contains non-alphanumeric characters or is longer than 40 characters. Enter an alphanumeric name which can contain dashes that is less than 40 characters, but the name must not begin or end with a dash for the " +NAME_ERROR_MSG = """The `name` must consist of alphanumeric characters (or -) and be 40 or fewer characters. The name you entered: +\tlen(name)== {length} +\tname={name} +""" def valid_name(name): @@ -211,9 +214,7 @@ def create_document( name=document_name, display_name=display_name, custom_metadata=custom_metadata ) else: - raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Document`. The name entered will be formatted as {self.name}/documents/." - ) + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = client.create_document(request) @@ -237,9 +238,7 @@ async def create_document_async( name=document_name, display_name=display_name, custom_metadata=custom_metadata ) else: - raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Document`. The name entered will be formatted as {self.name}/documents/." - ) + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = await client.create_document(request) @@ -542,9 +541,7 @@ def create_chunk( elif valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: - raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Chunk`. An empty name can also be passed in. The name entered will be formatted as {self.name}/chunk/." - ) + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) if isinstance(data, str): chunk = glm.Chunk( @@ -578,9 +575,7 @@ async def create_chunk_async( elif valid_name(name): chunk_name = f"{self.name}/chunks/{name}" else: - raise ValueError( - f"{_NAME_ERROR_MESSAGE}`Chunk`. An empty name can also be passed in. The name entered will be formatted as {self.name}/chunk/." - ) + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) if isinstance(data, str): chunk = glm.Chunk(