|
""" |
|
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ RAG ์ฑ๋ด ์ฑ |
|
""" |
|
import os |
|
import time |
|
import tempfile |
|
from typing import List, Dict, Tuple, Any, Optional |
|
import hashlib |
|
import pickle |
|
import json |
|
|
|
|
|
from config import ( |
|
PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL, |
|
STT_LANGUAGE, IS_HUGGINGFACE, OPENAI_API_KEY, USE_OPENAI |
|
) |
|
from optimized_document_processor import OptimizedDocumentProcessor |
|
from vector_store import VectorStore |
|
from langchain.schema import Document |
|
|
|
|
|
from clova_stt import ClovaSTT |
|
|
|
|
|
try: |
|
print("RAG ์ฒด์ธ ๋ชจ๋ ๋ก๋ ์๋...") |
|
from rag_chain import RAGChain |
|
|
|
RAG_CHAIN_AVAILABLE = True |
|
print("์ธ๋ถ RAG ์ฒด์ธ ๋ชจ๋ ๋ก๋ ์ฑ๊ณต") |
|
except Exception as e: |
|
print(f"์ธ๋ถ RAG ์ฒด์ธ ๋ก๋ ์คํจ: {e}") |
|
print("๋ด์ฅ RAG ์ฒด์ธ์ ์ฌ์ฉํฉ๋๋ค.") |
|
|
|
RAG_CHAIN_AVAILABLE = True |
|
|
|
|
|
try: |
|
from langchain_openai import ChatOpenAI |
|
from langchain_community.chat_models import ChatOllama |
|
from langchain.prompts import PromptTemplate |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.runnables import RunnablePassthrough |
|
|
|
|
|
class RAGChain: |
|
"""์ฑ์ ๋ด์ฅ๋ ๊ฐ๋จํ RAG ์ฒด์ธ""" |
|
def __init__(self, vector_store): |
|
print("๋ด์ฅ RAG ์ฒด์ธ ์ด๊ธฐํ ์ค...") |
|
self.vector_store = vector_store |
|
|
|
|
|
from config import OPENAI_API_KEY, LLM_MODEL, USE_OPENAI, TOP_K_RETRIEVAL, IS_HUGGINGFACE |
|
|
|
|
|
OLLAMA_HOST = "http://localhost:11434" |
|
if not IS_HUGGINGFACE and not USE_OPENAI: |
|
try: |
|
from config import OLLAMA_HOST |
|
print(f"Ollama ํธ์คํธ: {OLLAMA_HOST}") |
|
except ImportError: |
|
print("OLLAMA_HOST ์ค์ ์ ์ฐพ์ ์ ์์ด ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ") |
|
|
|
try: |
|
|
|
if USE_OPENAI or IS_HUGGINGFACE: |
|
self.llm = ChatOpenAI( |
|
model_name=LLM_MODEL, |
|
temperature=0.2, |
|
api_key=OPENAI_API_KEY, |
|
) |
|
print(f"OpenAI ๋ชจ๋ธ ์ด๊ธฐํ: {LLM_MODEL}") |
|
else: |
|
try: |
|
self.llm = ChatOllama( |
|
model=LLM_MODEL, |
|
temperature=0.2, |
|
base_url=OLLAMA_HOST, |
|
) |
|
print(f"Ollama ๋ชจ๋ธ ์ด๊ธฐํ: {LLM_MODEL}") |
|
except Exception as e: |
|
print(f"Ollama ์ด๊ธฐํ ์คํจ: {e}, OpenAI ๋ชจ๋ธ๋ก ๋์ฒด") |
|
self.llm = ChatOpenAI( |
|
model_name="gpt-3.5-turbo", |
|
temperature=0.2, |
|
api_key=OPENAI_API_KEY, |
|
) |
|
|
|
|
|
template = """ |
|
๋ค์ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ์ ํํ๊ฒ ๋ต๋ณํด์ฃผ์ธ์. |
|
|
|
์ง๋ฌธ: {question} |
|
|
|
์ฐธ๊ณ ์ ๋ณด: |
|
{context} |
|
|
|
์ฐธ๊ณ ์ ๋ณด์ ๋ต์ด ์๋ ๊ฒฝ์ฐ "์ ๊ณต๋ ๋ฌธ์์์ ํด๋น ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."๋ผ๊ณ ๋ต๋ณํ์ธ์. |
|
๋ต๋ณ์ ์ ํํ๊ณ ๊ฐ๊ฒฐํ๊ฒ ์ ๊ณตํ๋, ์ฐธ๊ณ ์ ๋ณด์์ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ ์ค๋ช
ํด์ฃผ์ธ์. |
|
์ฐธ๊ณ ์ ๋ณด์ ์ถ์ฒ๋ ํจ๊ป ์๋ ค์ฃผ์ธ์. |
|
""" |
|
|
|
self.prompt = PromptTemplate.from_template(template) |
|
|
|
|
|
self.chain = ( |
|
{"context": self._retrieve, "question": RunnablePassthrough()} |
|
| self.prompt |
|
| self.llm |
|
| StrOutputParser() |
|
) |
|
print("๋ด์ฅ RAG ์ฒด์ธ ์ด๊ธฐํ ์๋ฃ") |
|
except Exception as e: |
|
print(f"LLM ์ด๊ธฐํ ์คํจ: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
raise |
|
|
|
def _retrieve(self, query): |
|
"""๋ฌธ์ ๊ฒ์""" |
|
try: |
|
from config import TOP_K_RETRIEVAL |
|
docs = self.vector_store.similarity_search(query, k=TOP_K_RETRIEVAL) |
|
|
|
|
|
context_parts = [] |
|
for i, doc in enumerate(docs, 1): |
|
source = doc.metadata.get("source", "์ ์ ์๋ ์ถ์ฒ") |
|
page = doc.metadata.get("page", "") |
|
source_info = f"{source}" |
|
if page: |
|
source_info += f" (ํ์ด์ง: {page})" |
|
|
|
context_parts.append(f"[์ฐธ๊ณ ์๋ฃ {i}] - ์ถ์ฒ: {source_info}\n{doc.page_content}\n") |
|
|
|
return "\n".join(context_parts) |
|
except Exception as e: |
|
print(f"๊ฒ์ ์ค ์ค๋ฅ: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
return "๋ฌธ์ ๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค." |
|
|
|
def run(self, query): |
|
"""์ฟผ๋ฆฌ ์ฒ๋ฆฌ""" |
|
try: |
|
return self.chain.invoke(query) |
|
except Exception as e: |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค๋ฅ: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
return f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
except Exception as inner_e: |
|
print(f"๋ด์ฅ RAG ์ฒด์ธ ์ ์ ์คํจ: {inner_e}") |
|
import traceback |
|
traceback.print_exc() |
|
RAG_CHAIN_AVAILABLE = False |
|
|
|
|
|
class AutoRAGChatApp: |
|
""" |
|
documents ํด๋์ PDF ํ์ผ์ ์๋์ผ๋ก ์ฒ๋ฆฌํ๊ณ ์์ฑ์ธ์ ๊ธฐ๋ฅ์ ์ ๊ณตํ๋ RAG ์ฑ๋ด |
|
""" |
|
|
|
def __init__(self): |
|
""" |
|
RAG ์ฑ๋ด ์ ํ๋ฆฌ์ผ์ด์
์ด๊ธฐํ |
|
""" |
|
print("=" * 50) |
|
print("์์ฑ์ธ์ RAG ์ฑ๋ด ์ ํ๋ฆฌ์ผ์ด์
์ด๊ธฐํ ์์") |
|
print("=" * 50) |
|
|
|
|
|
self.pdf_directory = PDF_DIRECTORY |
|
self.cache_directory = "cached_data" |
|
self.index_file = os.path.join(self.cache_directory, "file_index.json") |
|
self.chunks_dir = os.path.join(self.cache_directory, "chunks") |
|
self.vector_index_dir = os.path.join(self.cache_directory, "vector_index") |
|
|
|
|
|
os.makedirs(self.pdf_directory, exist_ok=True) |
|
os.makedirs(self.cache_directory, exist_ok=True) |
|
os.makedirs(self.chunks_dir, exist_ok=True) |
|
os.makedirs(self.vector_index_dir, exist_ok=True) |
|
|
|
print(f"PDF ๋ฌธ์ ๋๋ ํ ๋ฆฌ: '{self.pdf_directory}'") |
|
print(f"์บ์ ๋๋ ํ ๋ฆฌ: '{self.cache_directory}'") |
|
|
|
|
|
self.document_processor = OptimizedDocumentProcessor( |
|
chunk_size=CHUNK_SIZE, |
|
chunk_overlap=CHUNK_OVERLAP |
|
) |
|
|
|
|
|
self.vector_store = VectorStore(use_milvus=False) |
|
|
|
|
|
self.file_index = self._load_file_index() |
|
|
|
|
|
self.documents = [] |
|
self.processed_files = [] |
|
self.is_initialized = False |
|
self.rag_chain = None |
|
|
|
|
|
self.stt_client = ClovaSTT() |
|
print("์์ฑ์ธ์(STT) ๊ธฐ๋ฅ์ด ์ด๊ธฐํ๋์์ต๋๋ค.") |
|
|
|
|
|
print(f"RAG ์ฒด์ธ ์ฌ์ฉ ๊ฐ๋ฅ: {RAG_CHAIN_AVAILABLE}") |
|
|
|
|
|
print("๋ฌธ์ ์๋ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์์...") |
|
result = self.auto_process_documents() |
|
print(f"์ด๊ธฐํ ์๋ฃ ์ํ: {self.is_initialized}") |
|
print("=" * 50) |
|
|
|
def _fallback_response(self, query: str) -> str: |
|
""" |
|
RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ ์ ๊ธฐ๋ณธ ์๋ต ์์ฑ |
|
|
|
Args: |
|
query: ์ฌ์ฉ์ ์ง๋ฌธ |
|
|
|
Returns: |
|
๊ธฐ๋ณธ ์๋ต ํ
์คํธ |
|
""" |
|
try: |
|
|
|
if self.vector_store and self.vector_store.vector_store: |
|
try: |
|
docs = self.vector_store.similarity_search(query, k=3) |
|
if docs: |
|
context = "\n\n".join([doc.page_content for doc in docs]) |
|
response = f""" |
|
์ง๋ฌธ์ ๋ํ ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค. RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. |
|
|
|
๊ทธ๋ฌ๋ ๋ฌธ์์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์์ต๋๋ค: |
|
|
|
{context} |
|
|
|
RAG ์ฒด์ธ ์ด๊ธฐํ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๋ ค๋ฉด ๋ก๊ทธ๋ฅผ ํ์ธํ์ธ์. |
|
""" |
|
return response.strip() |
|
except Exception as e: |
|
print(f"๋ฒกํฐ ๊ฒ์ ์คํจ: {e}") |
|
|
|
|
|
return "์ฃ์กํฉ๋๋ค. RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์ ์ง๋ฌธ์ ์๋ตํ ์ ์์ต๋๋ค. ๊ธฐ์ ์ ์ธ ๋ฌธ์ ๋ฅผ ํด๊ฒฐ ์ค์
๋๋ค." |
|
|
|
except Exception as e: |
|
print(f"๊ธฐ๋ณธ ์๋ต ์์ฑ ์คํจ: {e}") |
|
return "์์คํ
์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์." |
|
|
|
def _process_pdf_file(self, file_path: str) -> List[Document]: |
|
""" |
|
PDF ํ์ผ ์ฒ๋ฆฌ - docling ์คํจ ์ PyPDFLoader ์ฌ์ฉ |
|
|
|
Args: |
|
file_path: ์ฒ๋ฆฌํ PDF ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
์ฒ๋ฆฌ๋ ๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
try: |
|
print(f"docling์ผ๋ก ์ฒ๋ฆฌ ์๋: {file_path}") |
|
|
|
|
|
try: |
|
|
|
import signal |
|
|
|
def timeout_handler(signum, frame): |
|
raise TimeoutError("docling ์ฒ๋ฆฌ ์๊ฐ ์ด๊ณผ") |
|
|
|
|
|
try: |
|
signal.signal(signal.SIGALRM, timeout_handler) |
|
signal.alarm(60) |
|
except: |
|
pass |
|
|
|
|
|
chunks = self.document_processor.process_pdf(file_path, use_docling=True) |
|
|
|
|
|
try: |
|
signal.alarm(0) |
|
except: |
|
pass |
|
|
|
return chunks |
|
|
|
except Exception as e: |
|
|
|
error_str = str(e) |
|
if "Invalid code point" in error_str or "RuntimeError" in error_str: |
|
print(f"docling ์ฒ๋ฆฌ ์ค๋ฅ (์ฝ๋ ํฌ์ธํธ ๋ฌธ์ ): {error_str}") |
|
print("PyPDFLoader๋ก ๋์ฒดํฉ๋๋ค.") |
|
else: |
|
print(f"docling ์ฒ๋ฆฌ ์ค๋ฅ: {error_str}") |
|
print("PyPDFLoader๋ก ๋์ฒดํฉ๋๋ค.") |
|
|
|
|
|
try: |
|
return self.document_processor.process_pdf(file_path, use_docling=False) |
|
except Exception as inner_e: |
|
print(f"PyPDFLoader ์ฒ๋ฆฌ ์ค๋ฅ: {inner_e}") |
|
raise |
|
|
|
except Exception as e: |
|
print(f"PDF ์ฒ๋ฆฌ ์ค ์ฌ๊ฐํ ์ค๋ฅ: {e}") |
|
|
|
return [] |
|
|
|
def _load_file_index(self) -> Dict[str, Dict[str, Any]]: |
|
""" |
|
ํ์ผ ์ธ๋ฑ์ค ๋ก๋ |
|
|
|
Returns: |
|
ํ์ผ ๊ฒฝ๋ก -> ๋ฉํ๋ฐ์ดํฐ ๋งคํ |
|
""" |
|
if os.path.exists(self.index_file): |
|
try: |
|
with open(self.index_file, 'r', encoding='utf-8') as f: |
|
return json.load(f) |
|
except Exception as e: |
|
print(f"์ธ๋ฑ์ค ํ์ผ ๋ก๋ ์คํจ: {e}") |
|
return {} |
|
return {} |
|
|
|
def _save_file_index(self) -> None: |
|
""" |
|
ํ์ผ ์ธ๋ฑ์ค ์ ์ฅ |
|
""" |
|
with open(self.index_file, 'w', encoding='utf-8') as f: |
|
json.dump(self.file_index, f, ensure_ascii=False, indent=2) |
|
|
|
def _calculate_file_hash(self, file_path: str) -> str: |
|
""" |
|
ํ์ผ ํด์ ๊ณ์ฐ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
MD5 ํด์๊ฐ |
|
""" |
|
hasher = hashlib.md5() |
|
with open(file_path, 'rb') as f: |
|
buf = f.read(65536) |
|
while len(buf) > 0: |
|
hasher.update(buf) |
|
buf = f.read(65536) |
|
return hasher.hexdigest() |
|
|
|
def _is_file_processed(self, file_path: str) -> bool: |
|
""" |
|
ํ์ผ์ด ์ด๋ฏธ ์ฒ๋ฆฌ๋์๊ณ ๋ณ๊ฒฝ๋์ง ์์๋์ง ํ์ธ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
์ฒ๋ฆฌ ์ฌ๋ถ |
|
""" |
|
if file_path not in self.file_index: |
|
return False |
|
|
|
|
|
current_hash = self._calculate_file_hash(file_path) |
|
|
|
|
|
if self.file_index[file_path]['hash'] != current_hash: |
|
print(f"ํ์ผ ๋ณ๊ฒฝ ๊ฐ์ง: {file_path}") |
|
return False |
|
|
|
|
|
chunks_path = self.file_index[file_path]['chunks_path'] |
|
if not os.path.exists(chunks_path): |
|
return False |
|
|
|
return True |
|
|
|
def _get_chunks_path(self, file_hash: str) -> str: |
|
""" |
|
์ฒญํฌ ํ์ผ ๊ฒฝ๋ก ์์ฑ |
|
|
|
Args: |
|
file_hash: ํ์ผ ํด์๊ฐ |
|
|
|
Returns: |
|
์ฒญํฌ ํ์ผ ๊ฒฝ๋ก |
|
""" |
|
return os.path.join(self.chunks_dir, f"{file_hash}.pkl") |
|
|
|
def _save_chunks(self, file_path: str, chunks: List[Document]) -> None: |
|
""" |
|
์ฒญํฌ ๋ฐ์ดํฐ ์ ์ฅ |
|
|
|
Args: |
|
file_path: ์๋ณธ ํ์ผ ๊ฒฝ๋ก |
|
chunks: ๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
|
|
file_hash = self._calculate_file_hash(file_path) |
|
|
|
|
|
chunks_path = self._get_chunks_path(file_hash) |
|
|
|
|
|
with open(chunks_path, 'wb') as f: |
|
pickle.dump(chunks, f) |
|
|
|
|
|
self.file_index[file_path] = { |
|
'hash': file_hash, |
|
'chunks_path': chunks_path, |
|
'last_processed': time.time(), |
|
'chunks_count': len(chunks) |
|
} |
|
|
|
|
|
self._save_file_index() |
|
|
|
print(f"์ฒญํฌ ์ ์ฅ ์๋ฃ: {file_path} ({len(chunks)}๊ฐ ์ฒญํฌ)") |
|
|
|
def _load_chunks(self, file_path: str) -> List[Document]: |
|
""" |
|
์ ์ฅ๋ ์ฒญํฌ ๋ฐ์ดํฐ ๋ก๋ |
|
|
|
Args: |
|
file_path: ํ์ผ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
๋ฌธ์ ์ฒญํฌ ๋ฆฌ์คํธ |
|
""" |
|
chunks_path = self.file_index[file_path]['chunks_path'] |
|
with open(chunks_path, 'rb') as f: |
|
chunks = pickle.load(f) |
|
|
|
print(f"์ฒญํฌ ๋ก๋ ์๋ฃ: {file_path} ({len(chunks)}๊ฐ ์ฒญํฌ)") |
|
return chunks |
|
|
|
def auto_process_documents(self) -> str: |
|
""" |
|
documents ํด๋์ PDF ํ์ผ ์๋ ์ฒ๋ฆฌ |
|
|
|
Returns: |
|
์ฒ๋ฆฌ ๊ฒฐ๊ณผ ๋ฉ์์ง |
|
""" |
|
try: |
|
start_time = time.time() |
|
|
|
|
|
pdf_files = [] |
|
for filename in os.listdir(self.pdf_directory): |
|
if filename.lower().endswith('.pdf'): |
|
pdf_files.append(os.path.join(self.pdf_directory, filename)) |
|
|
|
if not pdf_files: |
|
print(f"'{self.pdf_directory}' ํด๋์ PDF ํ์ผ์ด ์์ต๋๋ค.") |
|
return f"'{self.pdf_directory}' ํด๋์ PDF ํ์ผ์ด ์์ต๋๋ค." |
|
|
|
print(f"๋ฐ๊ฒฌ๋ PDF ํ์ผ: {len(pdf_files)}๊ฐ") |
|
|
|
|
|
new_files = [] |
|
updated_files = [] |
|
cached_files = [] |
|
failed_files = [] |
|
all_chunks = [] |
|
|
|
for file_path in pdf_files: |
|
if self._is_file_processed(file_path): |
|
|
|
chunks = self._load_chunks(file_path) |
|
all_chunks.extend(chunks) |
|
cached_files.append(file_path) |
|
self.processed_files.append(os.path.basename(file_path)) |
|
else: |
|
|
|
print(f"์ฒ๋ฆฌ ์ค: {file_path}") |
|
|
|
try: |
|
|
|
chunks = self._process_pdf_file(file_path) |
|
|
|
if chunks: |
|
|
|
self._save_chunks(file_path, chunks) |
|
|
|
all_chunks.extend(chunks) |
|
if file_path in self.file_index: |
|
updated_files.append(file_path) |
|
else: |
|
new_files.append(file_path) |
|
|
|
self.processed_files.append(os.path.basename(file_path)) |
|
else: |
|
print(f"'{file_path}' ์ฒ๋ฆฌ ์คํจ: ์ถ์ถ๋ ์ฒญํฌ ์์") |
|
failed_files.append(file_path) |
|
except Exception as e: |
|
print(f"'{file_path}' ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}") |
|
failed_files.append(file_path) |
|
|
|
|
|
self.documents = all_chunks |
|
|
|
processing_time = time.time() - start_time |
|
print(f"๋ฌธ์ ์ฒ๋ฆฌ ์๋ฃ: {len(all_chunks)}๊ฐ ์ฒญํฌ, {processing_time:.2f}์ด") |
|
|
|
|
|
if os.path.exists(self.vector_index_dir) and any(os.listdir(self.vector_index_dir)): |
|
|
|
try: |
|
print("์ ์ฅ๋ ๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์ค...") |
|
vector_store_loaded = self.vector_store.load_local(self.vector_index_dir) |
|
|
|
|
|
if self.vector_store.vector_store is not None: |
|
|
|
if new_files or updated_files: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค ์
๋ฐ์ดํธ ์ค...") |
|
self.vector_store.add_documents(self.documents) |
|
|
|
print("๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ") |
|
else: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค๋ฅผ ๋ก๋ํ์ผ๋ ์ ํจํ์ง ์์, ์๋ก ์์ฑํฉ๋๋ค.") |
|
self.vector_store.create_or_load(self.documents) |
|
|
|
except Exception as e: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์คํจ, ์๋ก ์์ฑํฉ๋๋ค: {e}") |
|
|
|
import traceback |
|
traceback.print_exc() |
|
|
|
|
|
self.vector_store.create_or_load(self.documents) |
|
else: |
|
|
|
print("์ ๋ฒกํฐ ์ธ๋ฑ์ค ์์ฑ ์ค...") |
|
self.vector_store.create_or_load(self.documents) |
|
|
|
|
|
if self.vector_store and self.vector_store.vector_store is not None: |
|
try: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์ค: {self.vector_index_dir}") |
|
save_result = self.vector_store.save_local(self.vector_index_dir) |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ: {self.vector_index_dir}") |
|
except Exception as e: |
|
print(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {e}") |
|
|
|
import traceback |
|
traceback.print_exc() |
|
else: |
|
print("๋ฒกํฐ ์ธ๋ฑ์ค๊ฐ ์ด๊ธฐํ๋์ง ์์ ์ ์ฅํ์ง ์์ต๋๋ค.") |
|
|
|
|
|
try: |
|
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์๋...") |
|
if RAG_CHAIN_AVAILABLE: |
|
print("RAG_CHAIN_AVAILABLE=True, ์ด๊ธฐํ ์งํ") |
|
|
|
self.rag_chain = RAGChain(self.vector_store) |
|
print("RAG ์ฒด์ธ ๊ฐ์ฒด ์์ฑ ์๋ฃ") |
|
|
|
try: |
|
test_response = self.rag_chain.run("ํ
์คํธ ์ฟผ๋ฆฌ์
๋๋ค.") |
|
print(f"RAG ์ฒด์ธ ํ
์คํธ ์ฑ๊ณต: ์๋ต ๊ธธ์ด {len(test_response)}") |
|
self.is_initialized = True |
|
except Exception as test_e: |
|
print(f"RAG ์ฒด์ธ ํ
์คํธ ์คํจ: {test_e}") |
|
import traceback |
|
traceback.print_exc() |
|
self.is_initialized = False |
|
return f"RAG ์ฒด์ธ ํ
์คํธ ์คํจ: {test_e}" |
|
else: |
|
print("RAG_CHAIN_AVAILABLE=False, ์ด๊ธฐํ ๋ถ๊ฐ") |
|
self.is_initialized = False |
|
return "RAG ์ฒด์ธ ๋ชจ๋์ ์ฌ์ฉํ ์ ์์ต๋๋ค." |
|
|
|
|
|
print(f"RAG ์ฒด์ธ ์ด๊ธฐํ ๊ฒฐ๊ณผ: is_initialized={self.is_initialized}") |
|
if self.is_initialized: |
|
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์ฑ๊ณต!") |
|
else: |
|
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจํ์ง๋ง ์์ธ๋ ๋ฐ์ํ์ง ์์.") |
|
return "RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ: ์์ธ ๋ถ๋ช
" |
|
|
|
except Exception as e: |
|
print(f"RAG ์ฒด์ธ ์ด๊ธฐํ ์ค ์์ธ ๋ฐ์: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
self.is_initialized = False |
|
return f"RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ: {e}" |
|
|
|
total_time = time.time() - start_time |
|
|
|
status_message = ( |
|
f"๋ฌธ์ ์ฒ๋ฆฌ ์๋ฃ!\n" |
|
f"- ์ฒ๋ฆฌ๋ ํ์ผ: {len(self.processed_files)}๊ฐ\n" |
|
f"- ์บ์๋ ํ์ผ: {len(cached_files)}๊ฐ\n" |
|
f"- ์ ํ์ผ: {len(new_files)}๊ฐ\n" |
|
f"- ์
๋ฐ์ดํธ๋ ํ์ผ: {len(updated_files)}๊ฐ\n" |
|
f"- ์คํจํ ํ์ผ: {len(failed_files)}๊ฐ\n" |
|
f"- ์ด ์ฒญํฌ ์: {len(self.documents)}๊ฐ\n" |
|
f"- ์ฒ๋ฆฌ ์๊ฐ: {total_time:.2f}์ด\n" |
|
f"- RAG ์ฒด์ธ ์ด๊ธฐํ: {'์ฑ๊ณต' if self.is_initialized else '์คํจ'}\n" |
|
f"์ด์ ์ง๋ฌธํ ์ค๋น๊ฐ ๋์์ต๋๋ค!" |
|
) |
|
|
|
print(status_message) |
|
return status_message |
|
|
|
except Exception as e: |
|
self.is_initialized = False |
|
error_message = f"๋ฌธ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(error_message) |
|
import traceback |
|
traceback.print_exc() |
|
return error_message |
|
|
|
def reset_cache(self) -> str: |
|
""" |
|
์บ์ ์ด๊ธฐํ |
|
|
|
Returns: |
|
๊ฒฐ๊ณผ ๋ฉ์์ง |
|
""" |
|
try: |
|
|
|
for filename in os.listdir(self.chunks_dir): |
|
file_path = os.path.join(self.chunks_dir, filename) |
|
if os.path.isfile(file_path): |
|
os.remove(file_path) |
|
|
|
|
|
self.file_index = {} |
|
self._save_file_index() |
|
|
|
|
|
for filename in os.listdir(self.vector_index_dir): |
|
file_path = os.path.join(self.vector_index_dir, filename) |
|
if os.path.isfile(file_path): |
|
os.remove(file_path) |
|
|
|
self.documents = [] |
|
self.processed_files = [] |
|
self.is_initialized = False |
|
|
|
return "์บ์๊ฐ ์ด๊ธฐํ๋์์ต๋๋ค. ๋ค์ ์คํ ์ ๋ชจ๋ ๋ฌธ์๊ฐ ๋ค์ ์ฒ๋ฆฌ๋ฉ๋๋ค." |
|
except Exception as e: |
|
return f"์บ์ ์ด๊ธฐํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
def process_query(self, query: str, chat_history: List[List[str]]) -> Tuple[str, List[List[str]]]: |
|
""" |
|
์ฌ์ฉ์ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ |
|
|
|
Args: |
|
query: ์ฌ์ฉ์ ์ง๋ฌธ |
|
chat_history: ๋ํ ๊ธฐ๋ก (๋ฆฌ์คํธ ํ์) |
|
|
|
Returns: |
|
์๋ต ๋ฐ ์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก |
|
""" |
|
if not query: |
|
return "", chat_history |
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
print("ํ
์คํธ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์ ๋ก๋ ์ด๊ธฐํ๊ฐ ํ์ํฉ๋๋ค.") |
|
response = "๋ฌธ์ ๋ก๋๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์๋ ๋ก๋๋ฅผ ์๋ํฉ๋๋ค." |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
|
|
|
|
try: |
|
init_result = self.auto_process_documents() |
|
print(f"[DEBUG] ์๋ ๋ก๋ ํ is_initialized = {self.is_initialized}, RAG ์ฒด์ธ ์กด์ฌ = {self.rag_chain is not None}") |
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
response = f"๋ฌธ์๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค. 'documents' ํด๋์ PDF ํ์ผ์ด ์๋์ง ํ์ธํ์ธ์.\n์ค๋ฅ ์ ๋ณด: {init_result}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return "", new_history |
|
except Exception as e: |
|
response = f"๋ฌธ์ ๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return "", new_history |
|
else: |
|
print("ํ
์คํธ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์๊ฐ ์ด๋ฏธ ๋ก๋๋์ด ์์ต๋๋ค.") |
|
|
|
try: |
|
|
|
start_time = time.time() |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'") |
|
|
|
if self.is_initialized and self.rag_chain is not None: |
|
response = self.rag_chain.run(query) |
|
else: |
|
print("RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์: ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ") |
|
response = self._fallback_response(query) |
|
|
|
end_time = time.time() |
|
|
|
query_time = end_time - start_time |
|
print(f"์ฟผ๋ฆฌ ์ฒ๋ฆฌ ์๊ฐ: {query_time:.2f}์ด") |
|
print(f"์๋ต: {response[:100]}..." if len(response) > 100 else f"์๋ต: {response}") |
|
|
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return "", new_history |
|
except Exception as e: |
|
error_msg = f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค ์ค๋ฅ: {error_msg}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, error_msg]) |
|
return "", new_history |
|
|
|
def process_voice_query(self, audio, chat_history: List[List[str]]) -> List[List[str]]: |
|
""" |
|
์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ |
|
|
|
Args: |
|
audio: ๋
น์๋ ์ค๋์ค ๋ฐ์ดํฐ (numpy ๋ฐฐ์ด: (์ํ, ์ฑ๋)) |
|
chat_history: ๋ํ ๊ธฐ๋ก |
|
|
|
Returns: |
|
์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก |
|
""" |
|
if audio is None: |
|
return chat_history |
|
|
|
try: |
|
import numpy as np |
|
import scipy.io.wavfile as wav |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
temp_path = temp_file.name |
|
|
|
sr, data = audio |
|
|
|
wav.write(temp_path, sr, data.astype(np.int16)) |
|
|
|
print(f"[STT] ์์ ์ค๋์ค ํ์ผ ์์ฑ: {temp_path}") |
|
|
|
|
|
result = self.stt_client.recognize_file(temp_path, language=STT_LANGUAGE) |
|
|
|
|
|
try: |
|
os.unlink(temp_path) |
|
print("[STT] ์์ ์ค๋์ค ํ์ผ ์ญ์ ๋จ") |
|
except Exception as e: |
|
print(f"[STT] ์์ ํ์ผ ์ญ์ ์คํจ: {e}") |
|
|
|
|
|
if "error" in result: |
|
error_msg = f"์์ฑ์ธ์ ์ค๋ฅ: {result.get('error')}" |
|
print(f"[STT] {error_msg}") |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
return new_history |
|
|
|
|
|
recognized_text = result.get("text", "") |
|
if not recognized_text: |
|
error_msg = "์์ฑ์ ์ธ์ํ ์ ์์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์." |
|
print("[STT] ์ธ์๋ ํ
์คํธ ์์") |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
return new_history |
|
|
|
print(f"[STT] ์ธ์๋ ํ
์คํธ: {recognized_text}") |
|
|
|
|
|
query = f"๐ค {recognized_text}" |
|
print(f"[DEBUG] is_initialized = {self.is_initialized}, RAG ์ฒด์ธ ์กด์ฌ = {self.rag_chain is not None}") |
|
|
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
print("์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์ ๋ก๋ ์ด๊ธฐํ๊ฐ ํ์ํฉ๋๋ค.") |
|
response = "๋ฌธ์ ๋ก๋๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์๋ ๋ก๋๋ฅผ ์๋ํฉ๋๋ค." |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
|
|
|
|
try: |
|
init_result = self.auto_process_documents() |
|
print(f"[DEBUG] ์๋ ๋ก๋ ํ is_initialized = {self.is_initialized}, RAG ์ฒด์ธ ์กด์ฌ = {self.rag_chain is not None}") |
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
response = f"๋ฌธ์๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค. 'documents' ํด๋์ PDF ํ์ผ์ด ์๋์ง ํ์ธํ์ธ์.\n์ค๋ฅ ์ ๋ณด: {init_result}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return new_history |
|
except Exception as e: |
|
response = f"๋ฌธ์ ๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return new_history |
|
else: |
|
print("์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์๊ฐ ์ด๋ฏธ ๋ก๋๋์ด ์์ต๋๋ค.") |
|
|
|
try: |
|
|
|
start_time = time.time() |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'") |
|
|
|
if self.is_initialized and self.rag_chain is not None: |
|
response = self.rag_chain.run(query) |
|
else: |
|
print("RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์: ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ") |
|
response = self._fallback_response(query) |
|
|
|
end_time = time.time() |
|
|
|
query_time = end_time - start_time |
|
print(f"์ฟผ๋ฆฌ ์ฒ๋ฆฌ ์๊ฐ: {query_time:.2f}์ด") |
|
print(f"์๋ต: {response[:100]}..." if len(response) > 100 else f"์๋ต: {response}") |
|
|
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
return new_history |
|
except Exception as e: |
|
error_msg = f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค ์ค๋ฅ: {error_msg}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, error_msg]) |
|
return new_history |
|
|
|
except Exception as e: |
|
error_msg = f"์์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"[STT] {error_msg}") |
|
import traceback |
|
traceback.print_exc() |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
return new_history |
|
|
|
def launch_app(self) -> None: |
|
""" |
|
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ Gradio ์ฑ ์คํ (์๋ ์์ฑ์ฒ๋ฆฌ ๊ฐ์ ) |
|
""" |
|
import gradio as gr |
|
import time |
|
|
|
|
|
def process_audio_auto(audio, chat_history): |
|
"""๋
น์ ์๋ฃ ์ ์๋์ผ๋ก STT ์ฒ๋ฆฌ ํ ์ง์ ์ฒ๋ฆฌ""" |
|
if audio is None: |
|
return chat_history, gr.update(interactive=True), "๋
น์ ํ ์๋์ผ๋ก ์ฒ๋ฆฌ๋ฉ๋๋ค", "" |
|
|
|
|
|
processing_msg = "์์ฑ ์ฒ๋ฆฌ ์ค..." |
|
|
|
try: |
|
import numpy as np |
|
import scipy.io.wavfile as wav |
|
|
|
|
|
yield chat_history, gr.update(interactive=False), processing_msg, "์์ฑ์ ํ
์คํธ๋ก ๋ณํ ์ค..." |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
temp_path = temp_file.name |
|
|
|
sr, data = audio |
|
|
|
wav.write(temp_path, sr, data.astype(np.int16)) |
|
|
|
print(f"[STT] ์์ ์ค๋์ค ํ์ผ ์์ฑ: {temp_path}") |
|
|
|
|
|
result = self.stt_client.recognize_file(temp_path, language=STT_LANGUAGE) |
|
|
|
|
|
try: |
|
os.unlink(temp_path) |
|
print("[STT] ์์ ์ค๋์ค ํ์ผ ์ญ์ ๋จ") |
|
except Exception as e: |
|
print(f"[STT] ์์ ํ์ผ ์ญ์ ์คํจ: {e}") |
|
|
|
|
|
if "error" in result: |
|
error_msg = f"์์ฑ์ธ์ ์ค๋ฅ: {result.get('error')}" |
|
print(f"[STT] {error_msg}") |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
yield new_history, gr.update(interactive=True), error_msg, "" |
|
return |
|
|
|
|
|
recognized_text = result.get("text", "") |
|
if not recognized_text: |
|
error_msg = "์์ฑ์ ์ธ์ํ ์ ์์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์." |
|
print("[STT] ์ธ์๋ ํ
์คํธ ์์") |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
yield new_history, gr.update(interactive=True), error_msg, "" |
|
return |
|
|
|
print(f"[STT] ์ธ์๋ ํ
์คํธ: {recognized_text}") |
|
|
|
|
|
yield chat_history, gr.update( |
|
interactive=False), f"์ธ์๋ ํ
์คํธ: {recognized_text}\n\n์๋ต ์์ฑ ์ค...", "์๋ต ์์ฑ ์ค..." |
|
|
|
|
|
query = f"๐ค {recognized_text}" |
|
print(f"[DEBUG] is_initialized = {self.is_initialized}, RAG ์ฒด์ธ ์กด์ฌ = {self.rag_chain is not None}") |
|
|
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
print("์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์ ๋ก๋ ์ด๊ธฐํ๊ฐ ํ์ํฉ๋๋ค.") |
|
response = "๋ฌธ์ ๋ก๋๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์๋ ๋ก๋๋ฅผ ์๋ํฉ๋๋ค." |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
|
|
|
|
try: |
|
init_result = self.auto_process_documents() |
|
print( |
|
f"[DEBUG] ์๋ ๋ก๋ ํ is_initialized = {self.is_initialized}, RAG ์ฒด์ธ ์กด์ฌ = {self.rag_chain is not None}") |
|
|
|
if not self.is_initialized or self.rag_chain is None: |
|
response = f"๋ฌธ์๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค. 'documents' ํด๋์ PDF ํ์ผ์ด ์๋์ง ํ์ธํ์ธ์.\n์ค๋ฅ ์ ๋ณด: {init_result}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
yield new_history, gr.update(interactive=True), response, "" |
|
return |
|
except Exception as e: |
|
response = f"๋ฌธ์ ๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
yield new_history, gr.update(interactive=True), response, "" |
|
return |
|
else: |
|
print("์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ: ๋ฌธ์๊ฐ ์ด๋ฏธ ๋ก๋๋์ด ์์ต๋๋ค.") |
|
|
|
try: |
|
|
|
start_time = time.time() |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'") |
|
|
|
if self.is_initialized and self.rag_chain is not None: |
|
response = self.rag_chain.run(query) |
|
else: |
|
print("RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์: ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ") |
|
response = self._fallback_response(query) |
|
|
|
end_time = time.time() |
|
|
|
query_time = end_time - start_time |
|
print(f"์ฟผ๋ฆฌ ์ฒ๋ฆฌ ์๊ฐ: {query_time:.2f}์ด") |
|
print(f"์๋ต: {response[:100]}..." if len(response) > 100 else f"์๋ต: {response}") |
|
|
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, response]) |
|
|
|
|
|
yield new_history, gr.update(interactive=True), f"์ฒ๋ฆฌ ์๋ฃ: {recognized_text}", "" |
|
|
|
except Exception as e: |
|
error_msg = f"์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"RAG ์ฒด์ธ ์คํ ์ค ์ค๋ฅ: {error_msg}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
new_history = list(chat_history) |
|
new_history.append([query, error_msg]) |
|
yield new_history, gr.update(interactive=True), error_msg, "" |
|
|
|
except Exception as e: |
|
error_msg = f"์์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
print(f"[STT] {error_msg}") |
|
import traceback |
|
traceback.print_exc() |
|
new_history = list(chat_history) |
|
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg]) |
|
yield new_history, gr.update(interactive=True), error_msg, "" |
|
|
|
with gr.Blocks(title="์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ PDF ๋ฌธ์ ๊ธฐ๋ฐ RAG ์ฑ๋ด") as app: |
|
gr.Markdown("# ์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ PDF ๋ฌธ์ ๊ธฐ๋ฐ RAG ์ฑ๋ด") |
|
gr.Markdown(f"* ์ฌ์ฉ ์ค์ธ LLM ๋ชจ๋ธ: **{LLM_MODEL}**") |
|
gr.Markdown(f"* PDF ๋ฌธ์ ํด๋: **{self.pdf_directory}**") |
|
gr.Markdown("* ๋ค์ด๋ฒ ํด๋ก๋ฐ ์์ฑ์ธ์ API ํตํฉ") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
status_box = gr.Textbox( |
|
label="๋ฌธ์ ์ฒ๋ฆฌ ์ํ", |
|
value=f"์ฒ๋ฆฌ๋ ๋ฌธ์ ({len(self.processed_files)}๊ฐ): {', '.join(self.processed_files)}", |
|
lines=5, |
|
interactive=False |
|
) |
|
|
|
|
|
refresh_button = gr.Button("๋ฌธ์ ์๋ก ์ฝ๊ธฐ", variant="primary") |
|
reset_button = gr.Button("์บ์ ์ด๊ธฐํ", variant="stop") |
|
|
|
|
|
with gr.Accordion("์บ์ ์ธ๋ถ ์ ๋ณด", open=False): |
|
file_info = "" |
|
for file_path, info in self.file_index.items(): |
|
file_info += f"- {os.path.basename(file_path)}: {info['chunks_count']}๊ฐ ์ฒญํฌ\n" |
|
|
|
cache_info = gr.Textbox( |
|
label="์บ์๋ ํ์ผ ์ ๋ณด", |
|
value=file_info or "์บ์๋ ํ์ผ์ด ์์ต๋๋ค.", |
|
lines=5, |
|
interactive=False |
|
) |
|
|
|
with gr.Column(scale=2): |
|
|
|
chatbot = gr.Chatbot( |
|
label="๋ํ ๋ด์ฉ", |
|
height=500, |
|
show_copy_button=True |
|
) |
|
|
|
with gr.Tabs() as input_tabs: |
|
|
|
with gr.Tab("ํ
์คํธ ์
๋ ฅ"): |
|
|
|
with gr.Row(): |
|
query_box = gr.Textbox( |
|
label="์ง๋ฌธ", |
|
placeholder="์ฒ๋ฆฌ๋ ๋ฌธ์ ๋ด์ฉ์ ๋ํด ์ง๋ฌธํ์ธ์...", |
|
lines=2, |
|
scale=4 |
|
) |
|
submit_btn = gr.Button("์ ์ก", variant="primary", scale=1) |
|
|
|
|
|
with gr.Tab("์์ฑ ์
๋ ฅ"): |
|
|
|
processing_status = gr.Textbox( |
|
label="์ฒ๋ฆฌ ์ํ", |
|
placeholder="๋
น์ ํ ์๋์ผ๋ก ์ฒ๋ฆฌ๋ฉ๋๋ค", |
|
lines=3, |
|
interactive=False, |
|
visible=True |
|
) |
|
|
|
|
|
progress_bar = gr.Textbox( |
|
label="์งํ ์ํฉ", |
|
value="", |
|
visible=True |
|
) |
|
|
|
|
|
audio_input_auto = gr.Audio( |
|
label="๋ง์ดํฌ ์
๋ ฅ (๋
น์ ํ ์๋ ์ฒ๋ฆฌ)", |
|
sources=["microphone"], |
|
type="numpy", |
|
format="wav", |
|
streaming=False |
|
) |
|
|
|
|
|
clear_chat_button = gr.Button("๋ํ ์ด๊ธฐํ") |
|
|
|
|
|
refresh_button.click( |
|
fn=self.auto_process_documents, |
|
inputs=[], |
|
outputs=[status_box] |
|
) |
|
|
|
reset_button.click( |
|
fn=lambda: (self.reset_cache(), self.auto_process_documents()), |
|
inputs=[], |
|
outputs=[status_box] |
|
) |
|
|
|
|
|
submit_btn.click( |
|
fn=self.process_query, |
|
inputs=[query_box, chatbot], |
|
outputs=[query_box, chatbot] |
|
) |
|
|
|
|
|
query_box.submit( |
|
fn=self.process_query, |
|
inputs=[query_box, chatbot], |
|
outputs=[query_box, chatbot] |
|
) |
|
|
|
|
|
audio_input_auto.stop_recording( |
|
fn=process_audio_auto, |
|
inputs=[audio_input_auto, chatbot], |
|
outputs=[chatbot, audio_input_auto, processing_status, progress_bar] |
|
) |
|
|
|
|
|
clear_chat_button.click( |
|
fn=lambda: [], |
|
outputs=[chatbot] |
|
) |
|
|
|
|
|
app.launch(share=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
app = AutoRAGChatApp() |
|
app.launch_app() |