PDF-RAG

Running

App Files Files Community

aiqtech commited on Jan 29

Commit

3f160bb

verified ·

1 Parent(s): 87dacef

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -219

app.py DELETED Viewed

@@ -1,219 +0,0 @@
-import os
-from typing import List
-from chainlit.types import AskFileResponse
-from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
-from aimakerspace.openai_utils.prompts import (
-    UserRolePrompt,
-    SystemRolePrompt,
-    AssistantRolePrompt,
-)
-from aimakerspace.openai_utils.embedding import EmbeddingModel
-from aimakerspace.vectordatabase import VectorDatabase
-from aimakerspace.openai_utils.chatmodel import ChatOpenAI
-import chainlit as cl
-from chainlit import user_session
-from chainlit.element import Text
-system_template = """\
-Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
-system_role_prompt = SystemRolePrompt(system_template)
-user_prompt_template = """\
-Context:
-{context}
-Question:
-{question}
-"""
-user_role_prompt = UserRolePrompt(user_prompt_template)
-@cl.on_chat_start
-async def init_sidebar():
-    # 사이드바 헤더 꾸미기
-    await cl.Sidebar(
-        cl.Text(content="📁 **파일 업로드 섹션**", style="heading3"),
-        cl.FilePicker(
-            accept=[".pdf", ".txt"],
-            max_size_mb=2,
-            on_upload=handle_upload,
-            label="📤 PDF/TXT 업로드",
-            description="최대 2MB 파일만 업로드 가능합니다"
-        ),
-        cl.Separator(),
-        cl.Text(content="🔍 **문서 분석 상태**", style="heading4"),
-        cl.ProgressRing(id="progress", visible=False),
-        cl.Text(id="status", content="대기 중...", style="caption"),
-        title="📚 문서 질의 시스템",
-        persistent=True  # 👈 사이드바 고정 설정
-    ).send()
-async def handle_upload(file: AskFileResponse):
-    # 진행 상태 업데이트
-    status = user_session.get("status")
-    progress = user_session.get("progress")
-    await status.update(content=f"🔍 {file.name} 분석 중...")
-    await progress.update(visible=True)
-    try:
-        # 파일 처리 로직
-        texts = process_file(file)
-        # 벡터 DB 구축
-        vector_db = VectorDatabase()
-        vector_db = await vector_db.abuild_from_list(texts)
-        # 세션에 저장
-        user_session.set("vector_db", vector_db)
-        # 상태 업데이트
-        await status.update(content=f"✅ {len(texts)}개 청크 처리 완료!")
-        await progress.update(visible=False)
-        # 파일 정보 요약 표시
-        await cl.Accordion(
-            title="📄 업로드 문서 정보",
-            content=[
-                cl.Text(f"파일명: {file.name}"),
-                cl.Text(f"크기: {file.size/1024:.1f}KB"),
-                cl.Text(f"분석 시간: {datetime.now().strftime('%H:%M:%S')}")
-            ],
-            expanded=False
-        ).send()
-    except Exception as e:
-        await cl.Error(
-            title="파일 처리 오류",
-            content=f"{str(e)}"
-        ).send()
-class RetrievalAugmentedQAPipeline:
-    def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
-        self.llm = llm
-        self.vector_db_retriever = vector_db_retriever
-    async def arun_pipeline(self, user_query: str):
-        context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
-        context_prompt = ""
-        for context in context_list:
-            context_prompt += context[0] + "\n"
-        formatted_system_prompt = system_role_prompt.create_message()
-        formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)
-        async def generate_response():
-            async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
-                yield chunk
-        return {"response": generate_response(), "context": context_list}
-text_splitter = CharacterTextSplitter()
-def process_file(file: AskFileResponse):
-    import tempfile
-    import shutil
-    print(f"Processing file: {file.name}")
-    # Create a temporary file with the correct extension
-    suffix = f".{file.name.split('.')[-1]}"
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
-        # Copy the uploaded file content to the temporary file
-        shutil.copyfile(file.path, temp_file.name)
-        print(f"Created temporary file at: {temp_file.name}")
-        # Create appropriate loader
-        if file.name.lower().endswith('.pdf'):
-            loader = PDFLoader(temp_file.name)
-        else:
-            loader = TextFileLoader(temp_file.name)
-        try:
-            # Load and process the documents
-            documents = loader.load_documents()
-            texts = text_splitter.split_texts(documents)
-            return texts
-        finally:
-            # Clean up the temporary file
-            try:
-                os.unlink(temp_file.name)
-            except Exception as e:
-                print(f"Error cleaning up temporary file: {e}")
-@cl.on_chat_start
-async def on_chat_start():
-    files = None
-    # Wait for the user to upload a file
-    while files == None:
-        files = await cl.AskFileMessage(
-            content="Please upload a Text or PDF file to begin!",
-            accept=["text/plain", "application/pdf"],
-            max_size_mb=2,
-            timeout=180,
-        ).send()
-    file = files[0]
-    msg = cl.Message(
-        content=f"Processing `{file.name}`..."
-    )
-    await msg.send()
-    # load the file
-    texts = process_file(file)
-    print(f"Processing {len(texts)} text chunks")
-    # Create a dict vector store
-    vector_db = VectorDatabase()
-    vector_db = await vector_db.abuild_from_list(texts)
-    chat_openai = ChatOpenAI()
-    # Create a chain
-    retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
-        vector_db_retriever=vector_db,
-        llm=chat_openai
-    )
-    # Let the user know that the system is ready
-    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
-    await msg.update()
-    cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
-@cl.on_message
-async def main(message):
-    chain = cl.user_session.get("chain")
-    # 응답 스타일 개선
-    msg = cl.Message(
-        content="",
-        actions=[
-            cl.Action(name="source", value="📑 소스 보기"),
-            cl.Action(name="feedback", value="💬 피드백 남기기")
-        ]
-    )
-    async for token in result["response"]:
-        await msg.stream_token(token, is_final=False)
-    # 최종 메시지 포맷팅
-    final_content = f"""
-    🧠 **AI 분석 결과**
-    {msg.content}
-    📌 참조 문장:
-    {chr(10).join([f'- {ctx[0][:50]}...' for ctx in result['context']])}
-    """
-    await msg.update(content=final_content)