Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 134 additions & 88 deletions 09-VectorStore/utils/vectordbinterface.py
Original file line number Diff line number Diff line change
@@ -1,120 +1,166 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, List
from typing import Any, Dict, Optional, List, Iterable
from langchain_core.documents import Document


# ==========================================
# 1๏ธโƒฃ ์ธ๋ฑ์Šค ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค
# ==========================================
class IndexManagerInterface(ABC):
class DocumentManager(ABC):
"""
์ธ๋ฑ์Šค ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค
๋ฌธ์„œ insert/update (upsert, upsert_parallel)
๋ฌธ์„œ search by query (search)
๋ฌธ์„œ delete by id, delete by filter (delete)
"""

@abstractmethod
def create_index(
self,
index_name: str,
dimension: int,
metric: str = "dotproduct",
pod_spec=None,
**kwargs
) -> Any:
"""์ธ๋ฑ์Šค๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์œผ๋กœ ์ธ๋ฑ์Šค๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ์ƒ์„ฑ์ด ์™„๋ฃŒ๋˜๋ฉด index_name์„ ๋ฐ˜ํ™˜ํ•˜๊ณ , ์—†๋‹ค๋ฉด None์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
def upsert(self, texts: Iterable[str], metadatas: Optional[list[dict]] = None, ids: Optional[List[str]] = None, **kwargs: Any
) -> None:
"""๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค. """
pass

@abstractmethod
def list_indexs(self) -> Any:
"""์ธ๋ฑ์Šค ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค"""
def upsert_parallel(self, texts: Iterable[str], metadatas: Optional[list[dict]] = None, ids: Optional[List[str]] = None
, batch_size: int = 32, workers: int = 10, **kwargs: Any
) -> None:
"""๋ณ‘๋ ฌ๋กœ ๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค."""
pass

@abstractmethod
def get_index(self, index_name: str) -> Any:
"""์ธ๋ฑ์Šค๋ฅผ ์กฐํšŒํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์„ ๊ฐ€์ง„ ์ธ๋ฑ์Šค๊ฐ€ ์žˆ๋Š”์ง€ ์กฐํšŒํ•˜๊ณ , ์žˆ๋‹ค๋ฉด index_name์„ ๋ฐ˜ํ™˜ํ•˜๊ณ , ์—†๋‹ค๋ฉด None์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
pass
def search(self, query: str, k: int = 10, **kwargs: Any) -> List[Document]:
"""์ฟผ๋ฆฌ๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ  ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
๊ธฐ๋ณธ ๊ธฐ๋Šฅ: query (๋ฌธ์ž์—ด) -> ๋น„์Šทํ•œ ๋ฌธ์„œ k๊ฐœ ๋ฐ˜ํ™˜

@abstractmethod
def delete_index(self, index_name: str) -> None:
"""์ธ๋ฑ์Šค๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์„ ๊ฐ€์ง„ ์ธ๋ฑ์Šค๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค."""
cosine_similarity ์จ์น˜ํ•˜๋Š” ๊ฒƒ ์˜๋ฏธ **๋ฌธ์ œ๋  ๊ฒฝ์šฐ ์ด์Šˆ์ œ๊ธฐ

-๊ทธ์™ธ ๊ธฐ๋Šฅ (์ถ”ํ›„ ํ™•์žฅ)
metatdata search
์ด๋ฏธ์ง€ ์„œ์น˜ํ•  ๋•Œ ๋ฒกํ„ฐ ๋ฐ›๋Š” ๊ฒƒ
"""
pass

@abstractmethod
def delete(self, ids: Optional[list[str]] = None, filters: Optional[dict] = None, **kwargs: Any
) -> None:
"""ํ•„ํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์„œ๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค.

# ==========================================
# 2๏ธโƒฃ ๋ฌธ์„œ ์—…์„œํŠธ ์ธํ„ฐํŽ˜์ด์Šค
# ==========================================
class DocumentManagerInterface(ABC):
"""
๋ฌธ์„œ ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค (upsert, upsert_parallel)
"""
ids: List of ids to delete. If None, delete all. Default is None.
filters: Dictionary of filters (querys) to apply. If None, no filters apply.

@abstractmethod
def upsert_documents(
self, index_name: str, documents: List[Dict], **kwargs
) -> None:
"""๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค."""
"""
pass

@abstractmethod
def upsert_documents_parallel(
self,
index_name: str,
documents: List[Dict],
batch_size: int = 32,
max_workers: int = 10,
**kwargs
) -> None:
"""๋ณ‘๋ ฌ๋กœ ๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค."""
pass


# ==========================================
# 3๏ธโƒฃ ๋ฌธ์„œ ์กฐํšŒ ๋ฐ ์‚ญ์ œ ์ธํ„ฐํŽ˜์ด์Šค
# ==========================================
class QueryManagerInterface(ABC):
"""
๋ฌธ์„œ ๊ฒ€์ƒ‰ ๋ฐ ์‚ญ์ œ ์ธํ„ฐํŽ˜์ด์Šค (query, delete_by_filter)
"""
# deprecated
# # ==========================================
# # 1๏ธโƒฃ ์ธ๋ฑ์Šค ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค
# # ==========================================
# class IndexManagerInterface(ABC):
# """
# ์ธ๋ฑ์Šค ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค
# """

@abstractmethod
def query(
self, index_name: str, query_vector: List[float], top_k: int = 10, **kwargs
) -> List[Document]:
"""์ฟผ๋ฆฌ๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ  ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
pass
# @abstractmethod
# def create_index(
# self,
# index_name: str,
# dimension: int,
# metric: str = "dotproduct",
# pod_spec=None,
# **kwargs
# ) -> Any:
# """์ธ๋ฑ์Šค๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์œผ๋กœ ์ธ๋ฑ์Šค๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ์ƒ์„ฑ์ด ์™„๋ฃŒ๋˜๋ฉด index_name์„ ๋ฐ˜ํ™˜ํ•˜๊ณ , ์—†๋‹ค๋ฉด None์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
# pass

@abstractmethod
def delete_by_filter(self, index_name: str, filters: Dict, **kwargs) -> None:
"""ํ•„ํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์„œ๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค."""
pass
# @abstractmethod
# def list_indexs(self) -> Any:
# """์ธ๋ฑ์Šค ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค"""
# pass

# @abstractmethod
# def get_index(self, index_name: str) -> Any:
# """์ธ๋ฑ์Šค๋ฅผ ์กฐํšŒํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์„ ๊ฐ€์ง„ ์ธ๋ฑ์Šค๊ฐ€ ์žˆ๋Š”์ง€ ์กฐํšŒํ•˜๊ณ , ์žˆ๋‹ค๋ฉด index_name์„ ๋ฐ˜ํ™˜ํ•˜๊ณ , ์—†๋‹ค๋ฉด None์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
# pass

# ==========================================
# 4๏ธโƒฃ ํ†ตํ•ฉ ์ธํ„ฐํŽ˜์ด์Šค (VectorDBInterface)
# ==========================================
class VectorDBInterface(
IndexManagerInterface, DocumentManagerInterface, QueryManagerInterface, ABC
):
"""
๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์˜ ํ†ตํ•ฉ ์ธํ„ฐํŽ˜์ด์Šค
- ์ธ๋ฑ์Šค ๊ด€๋ฆฌ
- ๋ฌธ์„œ ์—…์„œํŠธ
- ๋ฌธ์„œ ๊ฒ€์ƒ‰ ๋ฐ ์‚ญ์ œ
"""
# @abstractmethod
# def delete_index(self, index_name: str) -> None:
# """์ธ๋ฑ์Šค๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค. ์ฆ‰, index_name์„ ๊ฐ€์ง„ ์ธ๋ฑ์Šค๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค."""
# pass

@abstractmethod
def connect(self, **kwargs) -> None:
"""DB ์—ฐ๊ฒฐ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
pass

@abstractmethod
def preprocess_documents(self, documents: List[Document], **kwargs) -> List[Dict]:
"""LangChain Document ๊ฐ์ฒด๋ฅผ ํŠน์ • DB์— ๋งž๋Š” ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
pass
# # ==========================================
# # 2๏ธโƒฃ ๋ฌธ์„œ ์—…์„œํŠธ ์ธํ„ฐํŽ˜์ด์Šค
# # ==========================================
# class DocumentManagerInterface(ABC):
# """
# ๋ฌธ์„œ ๊ด€๋ฆฌ ์ธํ„ฐํŽ˜์ด์Šค (upsert, upsert_parallel)
# """

@abstractmethod
def get_api_key(self) -> str:
"""DB ์—ฐ๊ฒฐ์„ ์œ„ํ•œ API ํ‚ค ๋˜๋Š” ์ธ์ฆ ์ •๋ณด ๋ฐ˜ํ™˜"""
pass
# @abstractmethod
# def upsert_documents(
# self, index_name: str, documents: List[Dict], **kwargs
# ) -> None:
# """๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค."""
# pass

# @abstractmethod
# def upsert_documents_parallel(
# self,
# index_name: str,
# documents: List[Dict],
# batch_size: int = 32,
# max_workers: int = 10,
# **kwargs
# ) -> None:
# """๋ณ‘๋ ฌ๋กœ ๋ฌธ์„œ๋ฅผ ์—…์„œํŠธํ•ฉ๋‹ˆ๋‹ค."""
# pass


# # ==========================================
# # 3๏ธโƒฃ ๋ฌธ์„œ ์กฐํšŒ ๋ฐ ์‚ญ์ œ ์ธํ„ฐํŽ˜์ด์Šค
# # ==========================================
# class QueryManagerInterface(ABC):
# """
# ๋ฌธ์„œ ๊ฒ€์ƒ‰ ๋ฐ ์‚ญ์ œ ์ธํ„ฐํŽ˜์ด์Šค (query, delete_by_filter)
# """

# @abstractmethod
# def query(
# self, index_name: str, query_vector: List[float], top_k: int = 10, **kwargs
# ) -> List[Document]:
# """์ฟผ๋ฆฌ๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ  ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
# pass

# @abstractmethod
# def delete_by_filter(self, index_name: str, filters: Dict, **kwargs) -> None:
# """ํ•„ํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์„œ๋ฅผ ์‚ญ์ œํ•ฉ๋‹ˆ๋‹ค."""
# pass


# # ==========================================
# # 4๏ธโƒฃ ํ†ตํ•ฉ ์ธํ„ฐํŽ˜์ด์Šค (VectorDBInterface)
# # ==========================================
# class VectorDBInterface(
# IndexManagerInterface, DocumentManagerInterface, QueryManagerInterface, ABC
# ):
# """
# ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์˜ ํ†ตํ•ฉ ์ธํ„ฐํŽ˜์ด์Šค
# - ์ธ๋ฑ์Šค ๊ด€๋ฆฌ
# - ๋ฌธ์„œ ์—…์„œํŠธ
# - ๋ฌธ์„œ ๊ฒ€์ƒ‰ ๋ฐ ์‚ญ์ œ
# """

# @abstractmethod
# def connect(self, **kwargs) -> None:
# """DB ์—ฐ๊ฒฐ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
# pass

# @abstractmethod
# def preprocess_documents(self, documents: List[Document], **kwargs) -> List[Dict]:
# """LangChain Document ๊ฐ์ฒด๋ฅผ ํŠน์ • DB์— ๋งž๋Š” ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
# pass

# @abstractmethod
# def get_api_key(self) -> str:
# """DB ์—ฐ๊ฒฐ์„ ์œ„ํ•œ API ํ‚ค ๋˜๋Š” ์ธ์ฆ ์ •๋ณด ๋ฐ˜ํ™˜"""
# pass


# ==========================================
Expand Down
Loading