Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions vechord/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import httpx
import msgspec

from vechord.utils import GEMINI_GENERATE_RPS, RateLimitTransport


class BaseAugmenter(ABC):
@abstractmethod
Expand Down Expand Up @@ -49,6 +51,7 @@ def __init__(self, model: str = "gemini-2.5-flash"):
self.client = httpx.AsyncClient(
headers={"Content-Type": "application/json"},
timeout=httpx.Timeout(120.0, connect=5.0),
transport=RateLimitTransport(max_per_second=GEMINI_GENERATE_RPS),
)

async def __aenter__(self):
Expand Down
3 changes: 3 additions & 0 deletions vechord/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import httpx
import msgspec

from vechord.utils import GEMINI_GENERATE_RPS, RateLimitTransport


class BaseChunker(ABC):
@abstractmethod
Expand Down Expand Up @@ -117,6 +119,7 @@ def __init__(self, model: str = "gemini-2.5-flash", size: int = 1536):
self.client = httpx.AsyncClient(
headers={"Content-Type": "application/json"},
timeout=httpx.Timeout(120.0, connect=5.0),
transport=RateLimitTransport(max_per_second=GEMINI_GENERATE_RPS),
)
self.prompt = f"""
You are an expert text chunker, skilled at dividing documents into meaningful
Expand Down
2 changes: 2 additions & 0 deletions vechord/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from vechord.log import logger
from vechord.model import SparseEmbedding
from vechord.utils import GEMINI_EMBEDDING_RPS, RateLimitTransport


class VecType(Enum):
Expand Down Expand Up @@ -91,6 +92,7 @@ def __init__(self, model: str = "gemini-embedding-exp-03-07", dim: int = 3072):
params={"key": self.api_key},
headers={"Content-Type": "application/json"},
timeout=httpx.Timeout(30.0, connect=10.0),
transport=RateLimitTransport(max_per_second=GEMINI_EMBEDDING_RPS),
)

async def __aenter__(self):
Expand Down
2 changes: 2 additions & 0 deletions vechord/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import msgspec

from vechord.model import Entity, Relation
from vechord.utils import GEMINI_GENERATE_RPS, RateLimitTransport


class BaseEntityRecognizer(ABC):
Expand Down Expand Up @@ -142,6 +143,7 @@ def __init__(self, model: str = "gemini-2.5-flash"):
self.client = httpx.AsyncClient(
headers={"Content-Type": "application/json"},
timeout=httpx.Timeout(30.0, connect=5.0),
transport=RateLimitTransport(max_per_second=GEMINI_GENERATE_RPS),
)

async def __aenter__(self):
Expand Down
2 changes: 2 additions & 0 deletions vechord/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytrec_eval

from vechord.model import RetrievedChunk
from vechord.utils import GEMINI_GENERATE_RPS, RateLimitTransport


class BaseEvaluator(ABC):
Expand Down Expand Up @@ -79,6 +80,7 @@ def __init__(self, model: str = "gemini-2.5-flash"):
self.client = httpx.AsyncClient(
headers={"Content-Type": "application/json"},
timeout=httpx.Timeout(120.0, connect=5.0),
transport=RateLimitTransport(max_per_second=GEMINI_GENERATE_RPS),
)
self.prompt = """
Given the following chunk of text and the overall document it belongs to, generate
Expand Down
2 changes: 2 additions & 0 deletions vechord/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from vechord.log import logger
from vechord.model import Document
from vechord.utils import GEMINI_GENERATE_RPS, RateLimitTransport


class BaseHTMLParser(HTMLParser):
Expand Down Expand Up @@ -109,6 +110,7 @@ def __init__(self, model: str = "gemini-2.5-flash"):
self.client = httpx.AsyncClient(
timeout=httpx.Timeout(10.0, read=120.0),
headers={"Content-Type": "application/json"},
transport=RateLimitTransport(max_per_second=GEMINI_GENERATE_RPS),
)

def name(self) -> str:
Expand Down
55 changes: 55 additions & 0 deletions vechord/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import asyncio
import sys
from typing import Any

if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self


import httpx

# https://ai.google.dev/gemini-api/docs/rate-limits#tier-1
GEMINI_GENERATE_RPS = 16.66
GEMINI_EMBEDDING_RPS = 0.6


class RateLimitTransport(httpx.AsyncHTTPTransport):
def __init__(self, max_per_second: float = 5, **kwargs) -> None:
"""
Async HTTP transport with rate limit.

Args:
max_per_second: Maximum number of requests per second.

Other args are passed to httpx.AsyncHTTPTransport.
"""
self.interval = 1 / max_per_second
self.next_start_time = 0
super().__init__(**kwargs)

async def notify_task_start(self):
"""
https://github.com/florimondmanca/aiometer/blob/358976e0b60bce29b9fe8c59807fafbad3e62cbc/src/aiometer/_impl/meters.py#L57
"""
loop = asyncio.get_running_loop()
while True:
now = loop.time()
next_start_time = max(self.next_start_time, now)
until_now = next_start_time - now
if until_now <= self.interval:
break
await asyncio.sleep(max(0, until_now - self.interval))
self.next_start_time = max(self.next_start_time, now) + self.interval

async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
await self.notify_task_start()
return await super().handle_async_request(request)

async def __aenter__(self) -> Self:
await self.notify_task_start()
return await super().__aenter__()

async def __aexit__(self, *args: Any) -> None:
await super().__aexit__(*args)