Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ publish: build
@uv publish

test:
@uv sync --extra spacy --inexact
@uv run -- pytest -v tests

sync:
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The related Docker images can be found in [VectorChord Suite][vectorchord-suite]
- [x] `Extractor` to extract the content from PDF, HTML, etc.
- [x] `EntityRecognizer` to extract the entities and relations from the text
- [x] `Reranker` for hybrid search
- [x] `GroundTruth` to generate the ground truth for evaluation

## Examples

Expand Down
20 changes: 18 additions & 2 deletions docs/source/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@
:show-inheritance:
```

## Extract

```{eval-rst}
.. automodule:: vechord.extract
:members:
:show-inheritance:
```

## Evaluate

```{eval-rst}
Expand All @@ -53,10 +61,18 @@
:show-inheritance:
```

## Extract
## GroundTruth

```{eval-rst}
.. automodule:: vechord.extract
.. automodule:: vechord.groundtruth
:members: GroundTruth
:show-inheritance:
```

## Graph

```{eval-rst}
.. automodule:: vechord.graph
:members:
:show-inheritance:
```
Expand Down
10 changes: 9 additions & 1 deletion docs/source/utils.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,18 @@ pip install vechord[openai,spacy,cohere]
- {py:class}`~vechord.embedding.VoyageDenseEmbedding`: VoyageAI embedding
- {py:class}`~vechord.embedding.SpacyDenseEmbedding`: Spacy embedding
- Evaluate
- {py:class}`~vechord.evaluate.GeminiEvaluator`: Gemini based evaluator
- {py:class}`~vechord.evaluate.GeminiEvaluator`: Gemini based query generator
- {py:class}`~vechord.evaluate.GeminiUMBRELAEvaluator`: Gemini UMBRELA evaluator
- Extract
- {py:class}`~vechord.extract.SimpleExtractor`: Simple extractor
- {py:class}`~vechord.extract.GeminiExtractor`: Gemini extractor
- {py:class}`~vechord.extract.LlamaParseExtractor`: Llama extractor
- Graph
- {py:class}`~vechord.graph.SpacyEntityRecognizer`: Spacy based entity recognizer
- {py:class}`~vechord.graph.GeminiEntityRecognizer`: Gemini based entity recognizer
- GroundTruth
- {py:class}`~vechord.groundtruth.GroundTruth`: generate ground truth
- Rerank
- {py:class}`~vechord.rerank.CohereReranker`: Cohere reranker
- {py:class}`~vechord.rerank.JinaReranker`: Jina MultiModal reranker
- {py:class}`~vechord.rerank.ReciprocalRankFusion`: fuse function for hybrid retrieval
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def anyio_backend():
@pytest.fixture(name="registry")
async def fixture_registry(request):
namespace = request.node.obj.__name__
tables = request.param or ()
tables = getattr(request, "param", ())
async with VechordRegistry(namespace, TEST_POSTGRES, tables=tables) as registry:
yield registry

Expand Down
57 changes: 57 additions & 0 deletions tests/test_groundtruth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import uuid
from unittest.mock import AsyncMock

import pytest

from vechord.client import set_namespace
from vechord.groundtruth import GroundTruth
from vechord.registry import VechordRegistry
from vechord.spec import _DefaultChunk

pytestmark = pytest.mark.anyio


@pytest.fixture(name="ground_truth_cleanup")
async def fixture_ground_truth_cleanup(request, registry: VechordRegistry):
namespace = request.node.obj.__name__
yield
# cleanup
async with set_namespace(namespace):
await registry.client.drop("test_query")


async def test_ground_truth(registry: VechordRegistry, ground_truth_cleanup):
queries = [
"What is the largest mammal?",
"What is the longest river in the world?",
"What is the smallest bird?",
]

async def mock_retrieve(query: str):
return [
_DefaultChunk(
uid=uuid.uuid5(uuid.NAMESPACE_DNS, query),
doc_id=None,
text=query,
vec=None,
keyword=None,
)
]

async def mock_estimate(query: str, passage: str, chunk_type=None):
return 1.0 + 2.0 if query == passage else 0.0

retrieve = AsyncMock()
retrieve.side_effect = mock_retrieve
evaluator = AsyncMock()
evaluator.estimate = mock_estimate
evaluator.relevant_threshold = 2.0

ground_truth = GroundTruth(name="test", vr=registry)
await ground_truth.generate(queries, retrieve, evaluator)

assert retrieve.call_count == len(queries)

# evaluate
metric = await ground_truth.evaluate(retrieve=retrieve)
assert metric.ndcg == 1.0, metric
42 changes: 42 additions & 0 deletions tests/test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import inspect

import pytest

from vechord.client import set_namespace
from vechord.model import ResourceRequest, RunIngestAck, RunRequest
from vechord.pipeline import DynamicPipeline
from vechord.registry import VechordRegistry
from vechord.spec import DefaultDocument

pytestmark = pytest.mark.anyio


@pytest.fixture(name="run_pipeline_cleanup")
async def fixture_run_pipeline_cleanup(request, registry: VechordRegistry):
namespace = request.node.obj.__name__
yield
# cleanup
async with set_namespace(namespace):
for table_name in ("defaultdocument", "chunk"):
await registry.client.drop(table_name)


async def test_run_pipeline(registry: VechordRegistry, run_pipeline_cleanup):
steps = [
ResourceRequest(kind="text-emb", provider="spacy", args={}),
ResourceRequest(
kind="chunk", provider="regex", args={"size": 128, "overlap": 0}
),
ResourceRequest(kind="index", provider="vectorchord", args={"vector": {}}),
]
namespace = inspect.currentframe().f_code.co_name
pipe = DynamicPipeline.from_steps(steps=steps)
ack: RunIngestAck = await pipe.run(
RunRequest(name=namespace, data="what to insert".encode(), steps=steps),
vr=registry,
)
assert ack.name == namespace
assert ack.uid

docs = await registry.select_by(DefaultDocument.partial_init())
assert len(docs) == 1
Loading