토니의 연습장
RAG 성능 테스트를 위한 함수 정의 본문
*PDFRAG class 는 하단의 코드 참고
from myrag import PDFRAG
# 질문에 대한 답변하는 함수를 생성
def ask_question_with_llm(llm):
# PDFRAG 객체 생성
rag = PDFRAG(
"data/SPRI_AI_Brief_2023년12월호_F.pdf",
llm,
)
# 검색기(retriever) 생성
retriever = rag.create_retriever()
# 체인(chain) 생성
rag_chain = rag.create_chain(retriever)
def _ask_question(inputs: dict):
# 질문에 대한 컨텍스트 검색
context = retriever.invoke(inputs["question"])
# 검색된 문서들을 하나의 문자열로 결합
context = "\n".join([doc.page_content for doc in context])
# 질문, 컨텍스트, 답변을 포함한 딕셔너리 반환
return {
"question": inputs["question"],
"context": context,
"answer": rag_chain.invoke(inputs["question"]),
}
return _ask_question
코드 상세설명 참고 : https://chatgpt.com/share/67c93c06-5fcc-8009-8d3d-6198ea081972
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
class PDFRAG:
def __init__(self, file_path: str, llm):
self.file_path = file_path
self.llm = llm
def load_documents(self):
# 문서 로드(Load Documents)
loader = PyMuPDFLoader(self.file_path)
docs = loader.load()
return docs
def split_documents(self, docs):
# 문서 분할(Split Documents)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)
return split_documents
def create_vectorstore(self, split_documents):
# 임베딩(Embedding) 생성
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# DB 생성(Create DB) 및 저장
vectorstore = FAISS.from_documents(
documents=split_documents, embedding=embeddings
)
return vectorstore
def create_retriever(self):
vectorstore = self.create_vectorstore(
self.split_documents(self.load_documents())
)
# 검색기(Retriever) 생성
retriever = vectorstore.as_retriever()
return retriever
def create_chain(self, retriever):
# 프롬프트 생성(Create Prompt)
prompt = PromptTemplate.from_template(
"""You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
#Context:
{context}
#Question:
{question}
#Answer:"""
)
# 체인(Chain) 생성
chain = (
{
"context": retriever,
"question": RunnablePassthrough(),
}
| prompt
| self.llm
| StrOutputParser()
)
return chain
'언어 AI (NLP) > LLM & RAG' 카테고리의 다른 글
Langchain - pipe 활용 (0) | 2025.03.24 |
---|---|
분산/병렬 처리 (DDP/MP) (0) | 2025.03.14 |
RAGAS 를 이용한 RAG 평가 (0) | 2025.02.27 |
RAG - AI Agent 예시 (0) | 2025.02.18 |
RAG 중복문장 제거 (0) | 2025.02.15 |