토니의 연습장
RAG 성능 테스트를 위한 함수 정의 본문
*PDFRAG class 는 하단의 코드 참고
from myrag import PDFRAG
# 질문에 대한 답변하는 함수를 생성
def ask_question_with_llm(llm):
    # PDFRAG 객체 생성
    rag = PDFRAG(
        "data/SPRI_AI_Brief_2023년12월호_F.pdf",
        llm,
    )
    # 검색기(retriever) 생성
    retriever = rag.create_retriever()
    # 체인(chain) 생성
    rag_chain = rag.create_chain(retriever)
    def _ask_question(inputs: dict):
        # 질문에 대한 컨텍스트 검색
        context = retriever.invoke(inputs["question"])
        # 검색된 문서들을 하나의 문자열로 결합
        context = "\n".join([doc.page_content for doc in context])
        # 질문, 컨텍스트, 답변을 포함한 딕셔너리 반환
        return {
            "question": inputs["question"],
            "context": context,
            "answer": rag_chain.invoke(inputs["question"]),
        }
    return _ask_question
코드 상세설명 참고 : https://chatgpt.com/share/67c93c06-5fcc-8009-8d3d-6198ea081972
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
class PDFRAG:
    def __init__(self, file_path: str, llm):
        self.file_path = file_path
        self.llm = llm
    def load_documents(self):
        # 문서 로드(Load Documents)
        loader = PyMuPDFLoader(self.file_path)
        docs = loader.load()
        return docs
    def split_documents(self, docs):
        # 문서 분할(Split Documents)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        split_documents = text_splitter.split_documents(docs)
        return split_documents
    def create_vectorstore(self, split_documents):
        # 임베딩(Embedding) 생성
        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        # DB 생성(Create DB) 및 저장
        vectorstore = FAISS.from_documents(
            documents=split_documents, embedding=embeddings
        )
        return vectorstore
    def create_retriever(self):
        vectorstore = self.create_vectorstore(
            self.split_documents(self.load_documents())
        )
        # 검색기(Retriever) 생성
        retriever = vectorstore.as_retriever()
        return retriever
    def create_chain(self, retriever):
        # 프롬프트 생성(Create Prompt)
        prompt = PromptTemplate.from_template(
            """You are an assistant for question-answering tasks. 
        Use the following pieces of retrieved context to answer the question. 
        If you don't know the answer, just say that you don't know. 
        #Context: 
        {context}
        #Question:
        {question}
        #Answer:"""
        )
        # 체인(Chain) 생성
        chain = (
            {
                "context": retriever,
                "question": RunnablePassthrough(),
            }
            | prompt
            | self.llm
            | StrOutputParser()
        )
        return chain
'언어 AI (NLP) > LLM & RAG & Agent' 카테고리의 다른 글
| Langchain - pipe 활용 (0) | 2025.03.24 | 
|---|---|
| 분산/병렬 처리 (DDP/MP) (0) | 2025.03.14 | 
| RAGAS 를 이용한 RAG 평가 (0) | 2025.02.27 | 
| RAG - AI Agent 예시 (0) | 2025.02.18 | 
| RAG 중복문장 제거 (0) | 2025.02.15 |