diff --git a/google/generativeai/retriever.py b/google/generativeai/retriever.py index e6666a819..f6ea84748 100644 --- a/google/generativeai/retriever.py +++ b/google/generativeai/retriever.py @@ -23,17 +23,12 @@ from google.generativeai.client import get_default_retriever_client from google.generativeai.client import get_default_retriever_async_client -from google.generativeai.types import retriever_types from google.generativeai.types.model_types import idecode_time - -_CORPORA_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+") -_REMOVE = string.punctuation -_REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens -_PATTERN = r"[{}]".format(_REMOVE) # Create the pattern +from google.generativeai.types import retriever_types def create_corpus( - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, client: glm.RetrieverServiceClient | None = None, ) -> retriever_types.Corpus: @@ -58,18 +53,12 @@ def create_corpus( if client is None: client = get_default_retriever_client() - if not name and not display_name: - raise ValueError("Either the corpus name or display name must be specified.") - corpus = None - if name: - if re.match(_CORPORA_NAME_REGEX, name): - corpus = glm.Corpus(name=name, display_name=display_name) - elif "corpora/" not in name: - corpus_name = "corpora/" + re.sub(_PATTERN, "", name) - corpus = glm.Corpus(name=corpus_name, display_name=display_name) - else: - raise ValueError("Corpus name must be formatted as corpora/.") + if retriever_types.valid_name(name): + corpus_name = "corpora/" + name # Construct the name + corpus = glm.Corpus(name=corpus_name, display_name=display_name) + else: + raise ValueError(retriever_types.NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateCorpusRequest(corpus=corpus) response = client.create_corpus(request) @@ -81,7 +70,7 @@ def create_corpus( async def create_corpus_async( - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, client: glm.RetrieverServiceAsyncClient | None = None, ) -> retriever_types.Corpus: @@ -89,18 +78,12 @@ async def create_corpus_async( if client is None: client = get_default_retriever_async_client() - if not name and not display_name: - raise ValueError("Either the corpus name or display name must be specified.") - corpus = None - if name: - if re.match(_CORPORA_NAME_REGEX, name): - corpus = glm.Corpus(name=name, display_name=display_name) - elif "corpora/" not in name: - corpus_name = "corpora/" + re.sub(_PATTERN, "", name) - corpus = glm.Corpus(name=corpus_name, display_name=display_name) - else: - raise ValueError("Corpus name must be formatted as corpora/.") + if retriever_types.valid_name(name): + corpus_name = "corpora/" + name # Construct the name + corpus = glm.Corpus(name=corpus_name, display_name=display_name) + else: + raise ValueError(retriever_types.NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateCorpusRequest(corpus=corpus) response = await client.create_corpus(request) @@ -147,7 +130,7 @@ async def get_corpus_async(name: str, client: glm.RetrieverServiceAsyncClient | return response -def delete_corpus(name: str, force: bool, client: glm.RetrieverServiceClient | None = None): # fmt: skip +def delete_corpus(name: str, force: bool = False, client: glm.RetrieverServiceClient | None = None): # fmt: skip """ Delete a `Corpus` from the service. @@ -162,7 +145,7 @@ def delete_corpus(name: str, force: bool, client: glm.RetrieverServiceClient | N client.delete_corpus(request) -async def delete_corpus_async(name: str, force: bool, client: glm.RetrieverServiceAsyncClient | None = None): # fmt: skip +async def delete_corpus_async(name: str, force: bool = False, client: glm.RetrieverServiceAsyncClient | None = None): # fmt: skip """This is the async version of `retriever.delete_corpus`.""" if client is None: client = get_default_retriever_async_client() diff --git a/google/generativeai/types/retriever_types.py b/google/generativeai/types/retriever_types.py index fd025d8f9..f8e210459 100644 --- a/google/generativeai/types/retriever_types.py +++ b/google/generativeai/types/retriever_types.py @@ -32,12 +32,16 @@ from google.generativeai.types.model_types import idecode_time from google.generativeai.utils import flatten_update_paths +_VALID_NAME = r"[a-z0-9]([a-z0-9-]{0,38}[a-z0-9])$" +NAME_ERROR_MSG = """The `name` must consist of alphanumeric characters (or -) and be 40 or fewer characters. The name you entered: +\tlen(name)== {length} +\tname={name} +""" + + +def valid_name(name): + return re.match(_VALID_NAME, name) and len(name) < 40 -_DOCUMENT_NAME_REGEX = re.compile(r"^corpora/[a-z0-9-]+/documents/[a-z0-9-]+$") -_CHUNK_NAME_REGEX = re.compile(r"^corpora/([^/]+?)(/documents/([^/]+?)(/chunks/([^/]+?))?)?$") -_REMOVE = string.punctuation -_REMOVE = _REMOVE.replace("-", "") # Don't remove hyphens -_PATTERN = r"[{}]".format(_REMOVE) # Create the pattern Operator = glm.Condition.Operator State = glm.Chunk.State @@ -180,7 +184,7 @@ class Corpus: def create_document( self, - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, @@ -203,24 +207,14 @@ def create_document( if client is None: client = get_default_retriever_client() - if not name and not display_name: - raise ValueError("Either the document name or display name must be specified.") - document = None - if name: - if re.match(_DOCUMENT_NAME_REGEX, name): - document = glm.Document( - name=name, display_name=display_name, custom_metadata=custom_metadata - ) - elif f"corpora/{self.name}/documents/" not in name: - document_name = f"{self.name}/documents/" + re.sub(_PATTERN, "", name) - document = glm.Document( - name=document_name, display_name=display_name, custom_metadata=custom_metadata - ) - else: - raise ValueError( - f"Document name must be formatted as {self.name}/document/." - ) + if valid_name(name): + document_name = f"{self.name}/documents/{name}" + document = glm.Document( + name=document_name, display_name=display_name, custom_metadata=custom_metadata + ) + else: + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = client.create_document(request) @@ -228,7 +222,7 @@ def create_document( async def create_document_async( self, - name: Optional[str] = None, + name: str, display_name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, @@ -237,24 +231,14 @@ async def create_document_async( if client is None: client = get_default_retriever_async_client() - if not name and not display_name: - raise ValueError("Either the document name or display name must be specified.") - document = None - if name: - if re.match(_DOCUMENT_NAME_REGEX, name): - document = glm.Document( - name=name, display_name=display_name, custom_metadata=custom_metadata - ) - elif f"corpora/{self.name}/documents/" not in name: - document_name = f"{self.name}/documents/" + re.sub(_PATTERN, "", name) - document = glm.Document( - name=document_name, display_name=display_name, custom_metadata=custom_metadata - ) - else: - raise ValueError( - f"Document name must be formatted as {self.name}/document/." - ) + if valid_name(name): + document_name = f"{self.name}/documents/{name}" + document = glm.Document( + name=document_name, display_name=display_name, custom_metadata=custom_metadata + ) + else: + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) request = glm.CreateDocumentRequest(parent=self.name, document=document) response = await client.create_document(request) @@ -431,7 +415,7 @@ async def query_async( def delete_document( self, name: str, - force: Optional[bool] = None, + force: bool = False, client: glm.RetrieverServiceClient | None = None, ): """ @@ -450,7 +434,7 @@ def delete_document( async def delete_document_async( self, name: str, - force: Optional[bool] = None, + force: bool = False, client: glm.RetrieverServiceAsyncClient | None = None, ): """This is the async version of `Corpus.delete_document`.""" @@ -528,8 +512,8 @@ class Document(abc.ABC): def create_chunk( self, - name: Optional[str], data: str | ChunkData, + name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceClient | None = None, ) -> Chunk: @@ -537,8 +521,8 @@ def create_chunk( Create a `Chunk` object which has textual data. Args: - name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-). data: The content for the `Chunk`, such as the text string. + name: The `Chunk` resource name. The ID (name excluding the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters that are lowercase alphanumeric or dashes (-). custom_metadata: User provided custom metadata stored as key-value pairs. state: States for the lifecycle of a `Chunk`. @@ -551,17 +535,13 @@ def create_chunk( if client is None: client = get_default_retriever_client() - chunk_name, chunk = "", None - if name: - if re.match(_CHUNK_NAME_REGEX, name): - chunk_name = name - - elif "chunks/" not in name: - chunk_name = f"{self.name}/chunks/" + re.sub(_PATTERN, "", name) - else: - raise ValueError( - f"Chunk name must be formatted as {self.name}/chunks/." - ) + chunk_name, chunk = None, None + if name is None: + chunk_name = None + elif valid_name(name): + chunk_name = f"{self.name}/chunks/{name}" + else: + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) if isinstance(data, str): chunk = glm.Chunk( @@ -580,8 +560,8 @@ def create_chunk( async def create_chunk_async( self, - name: Optional[str], data: str | ChunkData, + name: Optional[str] = None, custom_metadata: Optional[list[CustomMetadata]] = None, client: glm.RetrieverServiceAsyncClient | None = None, ) -> Chunk: @@ -589,17 +569,13 @@ async def create_chunk_async( if client is None: client = get_default_retriever_async_client() - chunk_name, chunk = "", None - if name: - if re.match(_CHUNK_NAME_REGEX, name): - chunk_name = name - - elif "chunks/" not in name: - chunk_name = f"{self.name}/chunks/" + re.sub(_PATTERN, "", name) - else: - raise ValueError( - f"Chunk name must be formatted as {self.name}/chunks/." - ) + chunk_name, chunk = None, None + if name is None: + chunk_name = None + elif valid_name(name): + chunk_name = f"{self.name}/chunks/{name}" + else: + raise ValueError(NAME_ERROR_MSG.format(length=len(name), name=name)) if isinstance(data, str): chunk = glm.Chunk( diff --git a/tests/test_retriever.py b/tests/test_retriever.py index a0f9e4395..2a0071095 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -46,7 +46,7 @@ def create_corpus( self.observed_requests.append(request) return glm.Corpus( name="corpora/demo_corpus", - display_name="demo_corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -58,7 +58,7 @@ def get_corpus( self.observed_requests.append(request) return glm.Corpus( name="corpora/demo_corpus", - display_name="demo_corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -69,8 +69,8 @@ def update_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus_1", + name="corpora/demo-corpus", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -82,14 +82,14 @@ def list_corpora( self.observed_requests.append(request) return [ glm.Corpus( - name="corpora/demo_corpus_1", - display_name="demo_corpus_1", + name="corpora/demo_corpus-1", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Corpus( - name="corpora/demo_corpus_2", - display_name="demo_corpus_2", + name="corpora/demo-corpus-2", + display_name="demo-corpus-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), @@ -105,7 +105,7 @@ def query_corpus( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -126,8 +126,8 @@ def create_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -138,8 +138,8 @@ def get_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo_doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -150,8 +150,8 @@ def update_document( ) -> glm.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/demo_doc", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -163,14 +163,14 @@ def list_documents( self.observed_requests.append(request) return [ glm.Document( - name="corpora/demo_corpus/documents/demo_doc_1", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/demo_doc_1", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Document( - name="corpora/demo_corpus/documents/demo_doc_2", - display_name="demo_doc_2", + name="corpora/demo-corpus/documents/demo_doc_2", + display_name="demo-doc-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), @@ -192,7 +192,7 @@ def query_document( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -209,7 +209,7 @@ def create_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -223,13 +223,13 @@ def batch_create_chunks( return glm.BatchCreateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc", + name="corpora/demo-corpus/documents/demo-doc/chunks/dc", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc1", + name="corpora/demo-corpus/documents/demo-doc/chunks/dc1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -243,7 +243,7 @@ def get_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -256,13 +256,13 @@ def list_chunks( self.observed_requests.append(request) return [ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -273,7 +273,7 @@ def list_chunks( def update_chunk(request: glm.UpdateChunkRequest) -> glm.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -287,13 +287,13 @@ def batch_update_chunks( return glm.BatchUpdateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -313,45 +313,32 @@ def batch_delete_chunks( ) -> None: self.observed_requests.append(request) - def test_create_corpus(self, display_name="demo_corpus"): - x = retriever.create_corpus(display_name=display_name) + def test_create_corpus(self, name="demo-corpus"): + x = retriever.create_corpus(name=name) self.assertIsInstance(x, retriever_service.Corpus) - self.assertEqual("demo_corpus", x.display_name) + self.assertEqual("demo-corpus", x.display_name) self.assertEqual("corpora/demo_corpus", x.name) - @parameterized.named_parameters( - [ - dict(testcase_name="match_corpora_regex", name="corpora/demo_corpus"), - dict(testcase_name="no_corpora", name="demo_corpus"), - dict(testcase_name="with_punctuation", name="corpora/demo_corpus*(*)"), - dict(testcase_name="dash_at_start", name="-demo_corpus"), - ] - ) - def test_create_corpus_names(self, name): + def test_get_corpus(self, name="demo-corpus"): x = retriever.create_corpus(name=name) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) - - def test_get_corpus(self, display_name="demo_corpus"): - x = retriever.create_corpus(display_name=display_name) c = retriever.get_corpus(name=x.name) - self.assertEqual("demo_corpus", c.display_name) + self.assertEqual("demo-corpus", c.display_name) def test_update_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - update_request = demo_corpus.update(updates={"display_name": "demo_corpus_1"}) + demo_corpus = retriever.create_corpus(name="demo-corpus") + update_request = demo_corpus.update(updates={"display_name": "demo-corpus_1"}) self.assertIsInstance(self.observed_requests[-1], glm.UpdateCorpusRequest) - self.assertEqual("demo_corpus_1", demo_corpus.display_name) + self.assertEqual("demo-corpus_1", demo_corpus.display_name) def test_list_corpora(self): x = list(retriever.list_corpora(page_size=1)) self.assertEqual(len(x), 2) def test_query_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") demo_chunk = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = demo_corpus.query(query="What kind of chunk is this?") @@ -361,7 +348,7 @@ def test_query_corpus(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -373,65 +360,47 @@ def test_query_corpus(self): ) def test_delete_corpus(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") delete_request = retriever.delete_corpus(name="corpora/demo_corpus", force=True) self.assertIsInstance(self.observed_requests[-1], glm.DeleteCorpusRequest) - def test_create_document(self, display_name="demo_doc"): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - x = demo_corpus.create_document(display_name=display_name) + def test_create_document(self, display_name="demo-doc"): + demo_corpus = retriever.create_corpus(name="demo-corpus") + x = demo_corpus.create_document(name=display_name) self.assertIsInstance(x, retriever_service.Document) - self.assertEqual("demo_doc", x.display_name) + self.assertEqual("demo-doc", x.display_name) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_document_regex", name="corpora/demo_corpus/documents/demo_doc" - ), - dict(testcase_name="no_document", name="corpora/demo_corpus/demo_document"), - dict( - testcase_name="with_punctuation", name="corpora/demo_corpus*(*)/documents/demo_doc" - ), - dict(testcase_name="dash_at_start", name="-demo_doc"), - ] - ) - def test_create_document_name(self, name): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - x = demo_corpus.create_document(name=name) - self.assertEqual("corpora/demo_corpus/documents/demo_doc", x.name) - self.assertEqual("demo_doc", x.display_name) - - def test_get_document(self, display_name="demo_doc"): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - x = demo_corpus.create_document(display_name=display_name) + def test_get_document(self, display_name="demo-doc"): + demo_corpus = retriever.create_corpus(name="demo-corpus") + x = demo_corpus.create_document(name=display_name) d = demo_corpus.get_document(name=x.name) - self.assertEqual("demo_doc", d.display_name) + self.assertEqual("demo-doc", d.display_name) def test_update_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - update_request = demo_document.update(updates={"display_name": "demo_doc_1"}) - self.assertEqual("demo_doc_1", demo_document.display_name) + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + update_request = demo_document.update(updates={"display_name": "demo-doc-1"}) + self.assertEqual("demo-doc-1", demo_document.display_name) def test_delete_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - demo_doc2 = demo_corpus.create_document(display_name="demo_doc_2") - delete_request = demo_corpus.delete_document(name="corpora/demo_corpus/documents/demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + demo_doc2 = demo_corpus.create_document(name="demo-doc-2") + delete_request = demo_corpus.delete_document(name="corpora/demo-corpus/documents/demo_doc") self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) def test_list_documents(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") - demo_doc2 = demo_corpus.create_document(display_name="demo_doc_2") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") + demo_doc2 = demo_corpus.create_document(name="demo-doc-2") self.assertLen(list(demo_corpus.list_documents()), 2) def test_query_document(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") demo_chunk = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = demo_document.query(query="What kind of chunk is this?") @@ -441,7 +410,7 @@ def test_query_document(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -453,39 +422,25 @@ def test_query_document(self): ) def test_create_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_chunk_regex", - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", - ), - dict(testcase_name="no_chunk", name="corpora/demo_corpus/demo_document/demo_chunk"), - dict( - testcase_name="with_punctuation", - name="corpora/demo_corpus*(*)/documents/demo_doc/chunks*****/demo_chunk", - ), - dict(testcase_name="dash_at_start", name="-demo_chunk"), - dict(testcase_name="empty_value", name=""), - ] - ) - def test_create_chunk_name(self, name): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + def test_create_chunk_empty(self): + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name=name, data="This is a demo chunk.", ) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertIsInstance(x, retriever_service.Chunk) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) + self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) @parameterized.named_parameters( [ @@ -493,11 +448,11 @@ def test_create_chunk_name(self, name): testcase_name="dictionaries", chunks=[ { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "name": "corpora/demo-corpus/documents/demo-doc/chunks/dc", "data": "This is a demo chunk.", }, { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "name": "corpora/demo-corpus/documents/demo-doc/chunks/dc1", "data": "This is another demo chunk.", }, ], @@ -506,11 +461,11 @@ def test_create_chunk_name(self, name): testcase_name="tuples", chunks=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "corpora/demo-corpus/documents/demo-doc/chunks/dc", "This is a demo chunk.", ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "corpora/demo-corpus/documents/demo-doc/chunks/dc1", "This is another demo chunk.", ), ], @@ -518,32 +473,32 @@ def test_create_chunk_name(self, name): ] ) def test_batch_create_chunks(self, chunks): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") chunks = demo_document.batch_create_chunks(chunks=chunks) self.assertIsInstance(self.observed_requests[-1], glm.BatchCreateChunksRequest) self.assertEqual("This is a demo chunk.", chunks[0].data.string_value) self.assertEqual("This is another demo chunk.", chunks[1].data.string_value) def test_get_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) ch = demo_document.get_chunk(name=x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), ch.data) def test_list_chunks(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) @@ -552,10 +507,10 @@ def test_list_chunks(self): self.assertLen(list_req, 2) def test_update_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) x.update(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -569,10 +524,10 @@ def test_update_chunk(self): dict( testcase_name="dictionary_of_updates", updates={ - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk": { + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -581,11 +536,11 @@ def test_update_chunk(self): testcase_name="list_of_tuples", updates=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk", {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/demo-doc/chunks/demo-chunk-1", {"data": {"string_value": "This is another updated chunk."}}, ), ], @@ -593,14 +548,14 @@ def test_update_chunk(self): ], ) def test_batch_update_chunks_data_structures(self, updates): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) update_request = demo_document.batch_update_chunks(chunks=updates) @@ -613,26 +568,24 @@ def test_batch_update_chunks_data_structures(self, updates): ) def test_delete_chunk(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) - delete_request = demo_document.delete_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk" - ) + delete_request = demo_document.delete_chunk(name="demo-chunk") self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) def test_batch_delete_chunks(self): - demo_corpus = retriever.create_corpus(display_name="demo_corpus") - demo_document = demo_corpus.create_document(display_name="demo_doc") + demo_corpus = retriever.create_corpus(name="demo-corpus") + demo_document = demo_corpus.create_document(name="demo-doc") x = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = demo_document.create_chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is another demo chunk.", ) delete_request = demo_document.batch_delete_chunks(chunks=[x.name, y.name]) diff --git a/tests/test_retriever_async.py b/tests/test_retriever_async.py index 2ecccb440..6512e268f 100644 --- a/tests/test_retriever_async.py +++ b/tests/test_retriever_async.py @@ -47,8 +47,8 @@ async def create_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus", + name="corpora/demo-corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -59,8 +59,8 @@ async def get_corpus( ) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus", + name="corpora/demo-corpus", + display_name="demo-corpus", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -69,8 +69,8 @@ async def get_corpus( async def update_corpus(request: glm.UpdateCorpusRequest) -> glm.Corpus: self.observed_requests.append(request) return glm.Corpus( - name="corpora/demo_corpus", - display_name="demo_corpus_1", + name="corpora/demo-corpus", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -81,14 +81,14 @@ async def list_corpora(request: glm.ListCorporaRequest) -> glm.ListCorporaRespon async def results(): yield glm.Corpus( - name="corpora/demo_corpus_1", - display_name="demo_corpus_1", + name="corpora/demo-corpus-1", + display_name="demo-corpus-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Corpus( - name="corpora/demo_corpus_2", - display_name="demo_corpus_2", + name="corpora/demo-corpus_2", + display_name="demo-corpus-2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -105,7 +105,7 @@ async def query_corpus( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -126,8 +126,8 @@ async def create_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -138,8 +138,8 @@ async def get_document( ) -> retriever_service.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -150,8 +150,8 @@ async def update_document( ) -> glm.Document: self.observed_requests.append(request) return glm.Document( - name="corpora/demo_corpus/documents/demo_doc", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/demo-doc", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -164,14 +164,14 @@ async def list_documents( async def results(): yield glm.Document( - name="corpora/demo_corpus/documents/demo_doc_1", - display_name="demo_doc_1", + name="corpora/demo-corpus/documents/dem-doc_1", + display_name="demo-doc-1", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Document( - name="corpora/demo_corpus/documents/demo_doc_2", - display_name="demo_doc_2", + name="corpora/demo-corpus/documents/dem-doc_2", + display_name="demo-doc_2", create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) @@ -194,7 +194,7 @@ async def query_document( glm.RelevantChunk( chunk_relevance_score=0.08, chunk=glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, custom_metadata=[], state=0, @@ -211,7 +211,7 @@ async def create_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -225,13 +225,13 @@ async def batch_create_chunks( return glm.BatchCreateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc", + name="corpora/demo-corpus/documents/dem-doc/chunks/dc", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/dc1", + name="corpora/demo-corpus/documents/dem-doc/chunks/dc1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -245,7 +245,7 @@ async def get_chunk( ) -> retriever_service.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -259,13 +259,13 @@ async def list_chunks( async def results(): yield glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is a demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ) yield glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data={"string_value": "This is another demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -277,7 +277,7 @@ async def results(): async def update_chunk(request: glm.UpdateChunkRequest) -> glm.Chunk: self.observed_requests.append(request) return glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated demo chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -291,13 +291,13 @@ async def batch_update_chunks( return glm.BatchUpdateChunksResponse( chunks=[ glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data={"string_value": "This is an updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", ), glm.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", data={"string_value": "This is another updated chunk."}, create_time="2000-01-01T01:01:01.123456Z", update_time="2000-01-01T01:01:01.123456Z", @@ -317,32 +317,19 @@ async def batch_delete_chunks( ) -> None: self.observed_requests.append(request) - async def test_create_corpus(self, display_name="demo_corpus"): - x = await retriever.create_corpus_async(display_name=display_name) + async def test_create_corpus(self, name="demo-corpus"): + x = await retriever.create_corpus_async(name=name) self.assertIsInstance(x, retriever_service.Corpus) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) + self.assertEqual("demo-corpus", x.display_name) + self.assertEqual("corpora/demo-corpus", x.name) - @parameterized.named_parameters( - [ - dict(testcase_name="match_corpora_regex", name="corpora/demo_corpus"), - dict(testcase_name="no_corpora", name="demo_corpus"), - dict(testcase_name="with_punctuation", name="corpora/demo_corpus*(*)"), - dict(testcase_name="dash_at_start", name="-demo_corpus"), - ] - ) - async def test_create_corpus_names(self, name): + async def test_get_corpus(self, name="demo-corpus"): x = await retriever.create_corpus_async(name=name) - self.assertEqual("demo_corpus", x.display_name) - self.assertEqual("corpora/demo_corpus", x.name) - - async def test_get_corpus(self, display_name="demo_corpus"): - x = await retriever.create_corpus_async(display_name=display_name) c = await retriever.get_corpus_async(name=x.name) - self.assertEqual("demo_corpus", c.display_name) + self.assertEqual("demo-corpus", c.display_name) async def test_update_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") update_request = await demo_corpus.update_async(updates={"display_name": "demo_corpus_1"}) self.assertEqual("demo_corpus_1", demo_corpus.display_name) @@ -354,10 +341,10 @@ async def test_list_corpora(self): self.assertEqual(len(result), 2) async def test_query_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") demo_chunk = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = await demo_corpus.query_async(query="What kind of chunk is this?") @@ -367,7 +354,7 @@ async def test_query_corpus(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -379,67 +366,49 @@ async def test_query_corpus(self): ) async def test_delete_corpus(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - delete_request = await retriever.delete_corpus_async(name="corpora/demo_corpus", force=True) + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + delete_request = await retriever.delete_corpus_async(name="corpora/demo-corpus", force=True) self.assertIsInstance(self.observed_requests[-1], glm.DeleteCorpusRequest) - async def test_create_document(self, display_name="demo_doc"): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(display_name=display_name) + async def test_create_document(self, display_name="demo-doc"): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + x = await demo_corpus.create_document_async(name=display_name) self.assertIsInstance(x, retriever_service.Document) - self.assertEqual("demo_doc", x.display_name) + self.assertEqual("demo-doc", x.display_name) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_document_regex", name="corpora/demo_corpus/documents/demo_doc" - ), - dict(testcase_name="no_document", name="corpora/demo_corpus/demo_document"), - dict( - testcase_name="with_punctuation", name="corpora/demo_corpus*(*)/documents/demo_doc" - ), - dict(testcase_name="dash_at_start", name="-demo_doc"), - ] - ) - async def test_create_document_name(self, name): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(name=name) - self.assertEqual("corpora/demo_corpus/documents/demo_doc", x.name) - self.assertEqual("demo_doc", x.display_name) - - async def test_get_document(self, display_name="demo_doc"): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - x = await demo_corpus.create_document_async(display_name=display_name) + async def test_get_document(self, display_name="demo-doc"): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + x = await demo_corpus.create_document_async(name=display_name) d = await demo_corpus.get_document_async(name=x.name) - self.assertEqual("demo_doc", d.display_name) + self.assertEqual("demo-doc", d.display_name) async def test_update_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - update_request = await demo_document.update_async(updates={"display_name": "demo_doc_1"}) - self.assertEqual("demo_doc_1", demo_document.display_name) + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + update_request = await demo_document.update_async(updates={"display_name": "demo-doc-1"}) + self.assertEqual("demo-doc-1", demo_document.display_name) async def test_delete_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - demo_doc2 = await demo_corpus.create_document_async(display_name="demo_doc_2") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + demo_doc2 = await demo_corpus.create_document_async(name="demo-doc-2") delete_request = await demo_corpus.delete_document_async( - name="corpora/demo_corpus/documents/demo_doc" + name="corpora/demo-corpus/documents/demo-doc" ) self.assertIsInstance(self.observed_requests[-1], glm.DeleteDocumentRequest) async def test_list_documents(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") - demo_doc2 = await demo_corpus.create_document_async(display_name="demo_doc_2") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") + demo_doc2 = await demo_corpus.create_document_async(name="demo-doc-2") self.assertLen(list(demo_corpus.list_documents()), 2) async def test_query_document(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") demo_chunk = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) q = await demo_document.query_async(query="What kind of chunk is this?") @@ -449,7 +418,7 @@ async def test_query_document(self): retriever_service.RelevantChunk( chunk_relevance_score=0.08, chunk=retriever_service.Chunk( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", data="This is a demo chunk.", custom_metadata=[], state=0, @@ -461,38 +430,25 @@ async def test_query_document(self): ) async def test_create_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) self.assertIsInstance(x, retriever_service.Chunk) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) - @parameterized.named_parameters( - [ - dict( - testcase_name="match_chunk_regex", - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", - ), - dict(testcase_name="no_chunk", name="corpora/demo_corpus/demo_document/demo_chunk"), - dict( - testcase_name="with_punctuation", - name="corpora/demo_corpus*(*)/documents/demo_doc/chunks*****/demo_chunk", - ), - dict(testcase_name="dash_at_start", name="-demo_chunk"), - ] - ) - async def test_create_chunk_name(self, name): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + async def test_create_chunk_empty(self): + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name=name, data="This is a demo chunk.", ) - self.assertEqual("corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", x.name) + self.assertIsInstance(x, retriever_service.Chunk) + self.assertEqual("corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", x.name) + self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), x.data) @parameterized.named_parameters( [ @@ -500,11 +456,11 @@ async def test_create_chunk_name(self, name): testcase_name="dictionaries", chunks=[ { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "name": "corpora/demo-corpus/documents/dem-doc/chunks/dc", "data": "This is a demo chunk.", }, { - "name": "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "name": "corpora/demo-corpus/documents/dem-doc/chunks/dc1", "data": "This is another demo chunk.", }, ], @@ -513,11 +469,11 @@ async def test_create_chunk_name(self, name): testcase_name="tuples", chunks=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc", + "corpora/demo-corpus/documents/dem-doc/chunks/dc", "This is a demo chunk.", ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/dc1", + "corpora/demo-corpus/documents/dem-doc/chunks/dc1", "This is another demo chunk.", ), ], @@ -525,32 +481,32 @@ async def test_create_chunk_name(self, name): ] ) async def test_batch_create_chunks(self, chunks): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") chunks = await demo_document.batch_create_chunks_async(chunks=chunks) self.assertIsInstance(self.observed_requests[-1], glm.BatchCreateChunksRequest) self.assertEqual("This is a demo chunk.", chunks[0].data.string_value) self.assertEqual("This is another demo chunk.", chunks[1].data.string_value) async def test_get_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) ch = await demo_document.get_chunk_async(name=x.name) self.assertEqual(retriever_service.ChunkData("This is a demo chunk."), ch.data) async def test_list_chunks(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) chunks = [] @@ -560,10 +516,10 @@ async def test_list_chunks(self): self.assertLen(chunks, 2) async def test_update_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) await x.update_async(updates={"data": {"string_value": "This is an updated demo chunk."}}) @@ -577,10 +533,10 @@ async def test_update_chunk(self): dict( testcase_name="dictionary_of_updates", updates={ - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk": { + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk": { "data": {"string_value": "This is an updated chunk."} }, - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1": { + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1": { "data": {"string_value": "This is another updated chunk."} }, }, @@ -589,11 +545,11 @@ async def test_update_chunk(self): testcase_name="list_of_tuples", updates=[ ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk", {"data": {"string_value": "This is an updated chunk."}}, ), ( - "corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + "corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk-1", {"data": {"string_value": "This is another updated chunk."}}, ), ], @@ -601,14 +557,14 @@ async def test_update_chunk(self): ], ) async def test_batch_update_chunks_data_structures(self, updates): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk_1", + name="demo-chunk-1", data="This is another demo chunk.", ) update_request = await demo_document.batch_update_chunks_async(chunks=updates) @@ -621,26 +577,26 @@ async def test_batch_update_chunks_data_structures(self, updates): ) async def test_delete_chunk(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) delete_request = await demo_document.delete_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk" + name="corpora/demo-corpus/documents/dem-doc/chunks/demo-chunk" ) self.assertIsInstance(self.observed_requests[-1], glm.DeleteChunkRequest) async def test_batch_delete_chunks(self): - demo_corpus = await retriever.create_corpus_async(display_name="demo_corpus") - demo_document = await demo_corpus.create_document_async(display_name="demo_doc") + demo_corpus = await retriever.create_corpus_async(name="demo-corpus") + demo_document = await demo_corpus.create_document_async(name="demo-doc") x = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is a demo chunk.", ) y = await demo_document.create_chunk_async( - name="corpora/demo_corpus/documents/demo_doc/chunks/demo_chunk", + name="demo-chunk", data="This is another demo chunk.", ) delete_request = await demo_document.batch_delete_chunks_async(chunks=[x.name, y.name])