Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added 16-Evaluations/data/Newwhitepaper_Agents2.pdf
Binary file not shown.
71 changes: 71 additions & 0 deletions 16-Evaluations/myrag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough


class PDFRAG:
def __init__(self, file_path: str, llm):
self.file_path = file_path
self.llm = llm

def load_documents(self):
# ๋ฌธ์„œ ๋กœ๋“œ(Load Documents)
loader = PyMuPDFLoader(self.file_path)
docs = loader.load()
return docs

def split_documents(self, docs):
# ๋ฌธ์„œ ๋ถ„ํ• (Split Documents)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)
return split_documents

def create_vectorstore(self, split_documents):
# ์ž„๋ฒ ๋”ฉ(Embedding) ์ƒ์„ฑ
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# DB ์ƒ์„ฑ(Create DB) ๋ฐ ์ €์žฅ
vectorstore = FAISS.from_documents(
documents=split_documents, embedding=embeddings
)
return vectorstore

def create_retriever(self):
vectorstore = self.create_vectorstore(
self.split_documents(self.load_documents())
)
# ๊ฒ€์ƒ‰๊ธฐ(Retriever) ์ƒ์„ฑ
retriever = vectorstore.as_retriever()
return retriever

def create_chain(self, retriever):
# ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ(Create Prompt)
prompt = PromptTemplate.from_template(
"""You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.

#Context:
{context}

#Question:
{question}

#Answer:"""
)

# ์ฒด์ธ(Chain) ์ƒ์„ฑ
chain = (
{
"context": retriever,
"question": RunnablePassthrough(),
}
| prompt
| self.llm
| StrOutputParser()
)
return chain
Loading