Spaces:
Paused
Paused
""" | |
๊ฐ์ ๋ ๋ฒกํฐ ์คํ ์ด ๋ชจ๋ - Milvus ์ค์ ์ต์ ํ ๋ฐ ์์ธ ์ฒ๋ฆฌ ๊ฐํ | |
""" | |
import os | |
import logging | |
from typing import List, Dict, Any, Optional | |
import uuid | |
from langchain.schema import Document | |
# ๋ก๊น ์ค์ | |
logger = logging.getLogger("VectorStore") | |
# ๋ฒกํฐ ์คํ ์ด ๊ด๋ จ ์์ธ ํด๋์ค | |
class VectorStoreInitError(Exception): | |
"""๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์ค ๋ฐ์ํ ์ค๋ฅ""" | |
pass | |
class EmbeddingModelError(Exception): | |
"""์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์ค ๋ฐ์ํ ์ค๋ฅ""" | |
pass | |
class DocumentIndexError(Exception): | |
"""๋ฌธ์ ์ธ๋ฑ์ฑ ์ค ๋ฐ์ํ ์ค๋ฅ""" | |
pass | |
class VectorSearchError(Exception): | |
"""๋ฒกํฐ ๊ฒ์ ์ค ๋ฐ์ํ ์ค๋ฅ""" | |
pass | |
class PersistenceError(Exception): | |
"""์ธ๋ฑ์ค ์ ์ฅ/๋ก๋ ์ค ๋ฐ์ํ ์ค๋ฅ""" | |
pass | |
# ๋ฒกํฐ ์คํ ์ด ์ํฌํธ | |
try: | |
# ์ต์ ๋ฒ์ ์ํฌํธ | |
from langchain_milvus import Milvus | |
from langchain_community.vectorstores import FAISS | |
from langchain_huggingface import HuggingFaceEmbeddings | |
MODERN_IMPORTS = True | |
logger.info("์ต์ langchain ํจํค์ง ์ํฌํธ ์ฑ๊ณต") | |
except ImportError: | |
try: | |
# ์ด์ ๋ฒ์ ์ํฌํธ | |
from langchain_community.vectorstores import Milvus, FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
MODERN_IMPORTS = False | |
logger.info("๋ ๊ฑฐ์ langchain_community ํจํค์ง ์ฌ์ฉ") | |
except ImportError as e: | |
logger.error(f"ํ์ ๋ฒกํฐ ์คํ ์ด ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ ์ ์์ต๋๋ค: {e}") | |
raise VectorStoreInitError(f"ํ์ ๋ฒกํฐ ์คํ ์ด ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ ์ ์์ต๋๋ค: {str(e)}") | |
from config import MILVUS_HOST, MILVUS_PORT, MILVUS_COLLECTION, EMBEDDING_MODEL | |
class VectorStore: | |
def __init__(self, use_milvus: bool = True): | |
""" | |
๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ | |
Args: | |
use_milvus: Milvus ์ฌ์ฉ ์ฌ๋ถ (False์ด๋ฉด FAISS ์ฌ์ฉ) | |
""" | |
self.use_milvus = use_milvus | |
self.vector_store = None | |
# ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ค์ | |
logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ์ค: {EMBEDDING_MODEL}") | |
model_kwargs = { | |
"device": "cpu", | |
"trust_remote_code": True # ์๊ฒฉ ์ฝ๋ ์คํ ํ์ฉ (ํ์) | |
} | |
encode_kwargs = {"normalize_embeddings": True} | |
try: | |
self.embeddings = HuggingFaceEmbeddings( | |
model_name=EMBEDDING_MODEL, | |
model_kwargs=model_kwargs, | |
encode_kwargs=encode_kwargs | |
) | |
logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์๋ฃ: {EMBEDDING_MODEL}") | |
except Exception as e: | |
logger.error(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) | |
raise EmbeddingModelError(f"์๋ฒ ๋ฉ ๋ชจ๋ธ '{EMBEDDING_MODEL}' ์ด๊ธฐํ ์คํจ: {str(e)}") | |
def init_milvus(self) -> Milvus: | |
""" | |
Milvus ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ | |
Returns: | |
Milvus ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค | |
""" | |
try: | |
connection_args = { | |
"host": MILVUS_HOST, | |
"port": MILVUS_PORT, | |
} | |
# ๋ฒกํฐ ๊ฒ์ ์ธ๋ฑ์ค ํ๋ผ๋ฏธํฐ (FLAT ์ธ๋ฑ์ค ๋ฐ ์ฝ์ฌ์ธ ์ ์ฌ๋ ๋ฉํธ๋ฆญ) | |
index_params = { | |
"index_type": "FLAT", # ์ ํ๋ ์ฐ์ FLAT ์ธ๋ฑ์ค | |
"metric_type": "COSINE", # ์ฝ์ฌ์ธ ์ ์ฌ๋ (์ ๊ทํ๋ ๋ฒกํฐ์ ์ ํฉ) | |
"params": {} # FLAT ์ธ๋ฑ์ค์๋ ์ถ๊ฐ ํ๋ผ๋ฏธํฐ ์์ | |
} | |
logger.info(f"Milvus ์ฐ๊ฒฐ ์๋ ์ค: {MILVUS_HOST}:{MILVUS_PORT}") | |
milvus_store = Milvus( | |
embedding_function=self.embeddings, | |
collection_name=MILVUS_COLLECTION, | |
connection_args=connection_args, | |
index_params=index_params | |
) | |
logger.info(f"Milvus ์ฐ๊ฒฐ ์ฑ๊ณต: {MILVUS_COLLECTION}") | |
return milvus_store | |
except Exception as e: | |
logger.error(f"Milvus ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) | |
raise VectorStoreInitError(f"Milvus ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ: {str(e)}") | |
def init_faiss(self) -> FAISS: | |
""" | |
FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ (๋ก์ปฌ ๋์ฒด์ฉ) | |
Returns: | |
FAISS ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค | |
""" | |
try: | |
logger.info("FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์ค") | |
faiss_store = FAISS.from_documents([], self.embeddings) | |
logger.info("FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์๋ฃ") | |
return faiss_store | |
except Exception as e: | |
logger.error(f"FAISS ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) | |
raise VectorStoreInitError(f"FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ: {str(e)}") | |
def create_or_load(self, documents: Optional[List[Document]] = None) -> Any: | |
""" | |
๋ฒกํฐ ์คํ ์ด ์์ฑ ๋๋ ๋ก๋ | |
Args: | |
documents: ์ ์ฅํ ๋ฌธ์ ๋ฆฌ์คํธ (None์ด๋ฉด ๋น ์คํ ์ด ์์ฑ) | |
Returns: | |
๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค | |
""" | |
if self.use_milvus: | |
if documents: | |
# ๋ฌธ์๊ฐ ์ ๊ณต๋ ๊ฒฝ์ฐ ์ ์ปฌ๋ ์ ์์ฑ | |
try: | |
# ์ฐ๊ฒฐ ์ค์ | |
connection_args = { | |
"host": MILVUS_HOST, | |
"port": MILVUS_PORT, | |
} | |
# ๊ฒ์ ์ธ๋ฑ์ค ์ค์ | |
index_params = { | |
"index_type": "FLAT", # ์ ํ๋ ์ฐ์ | |
"metric_type": "COSINE", # ์ฝ์ฌ์ธ ์ ์ฌ๋ | |
"params": {} | |
} | |
logger.info(f"Milvus ์ปฌ๋ ์ ์์ฑ ์ค: {MILVUS_COLLECTION} (๊ธฐ์กด ์ปฌ๋ ์ ์ญ์ )") | |
# ๋ฌธ์๋ก๋ถํฐ Milvus ์ปฌ๋ ์ ์์ฑ | |
self.vector_store = Milvus.from_documents( | |
documents=documents, | |
embedding=self.embeddings, | |
collection_name=MILVUS_COLLECTION, | |
connection_args=connection_args, | |
index_params=index_params, | |
drop_old=True # ๊ธฐ์กด ์ปฌ๋ ์ ์ญ์ (์ฌ๊ตฌ์ถ) | |
) | |
logger.info(f"Milvus ์ปฌ๋ ์ ์์ฑ ์๋ฃ: {len(documents)}๊ฐ ๋ฌธ์ ์ธ๋ฑ์ฑ๋จ") | |
except Exception as e: | |
logger.error(f"Milvus ์ปฌ๋ ์ ์์ฑ ์คํจ: {e}", exc_info=True) | |
# ๋์ฒด ๋ฐฉ์์ผ๋ก FAISS ์ฌ์ฉ | |
logger.warning("Milvus ์คํจ๋ก FAISS๋ก ๋์ฒดํฉ๋๋ค") | |
self.use_milvus = False | |
try: | |
self.vector_store = FAISS.from_documents(documents, self.embeddings) | |
logger.info(f"FAISS๋ก ๋์ฒด ์ฑ๊ณต: {len(documents)}๊ฐ ๋ฌธ์ ์ธ๋ฑ์ฑ๋จ") | |
except Exception as faiss_err: | |
logger.error(f"FAISS ๋์ฒด ์คํจ: {faiss_err}", exc_info=True) | |
raise DocumentIndexError(f"๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ (Milvus ๋ฐ FAISS): {str(e)} / {str(faiss_err)}") | |
else: | |
# ๊ธฐ์กด ์ปฌ๋ ์ ๋ก๋ | |
try: | |
self.vector_store = self.init_milvus() | |
except VectorStoreInitError as e: | |
logger.error(f"Milvus ์ปฌ๋ ์ ๋ก๋ ์คํจ: {e}") | |
# ๋์ฒด ๋ฐฉ์์ผ๋ก FAISS ์ฌ์ฉ | |
logger.warning("Milvus ์คํจ๋ก FAISS๋ก ๋์ฒดํฉ๋๋ค") | |
self.use_milvus = False | |
try: | |
self.vector_store = self.init_faiss() | |
except VectorStoreInitError as faiss_err: | |
logger.error(f"FAISS ๋์ฒด ์คํจ: {faiss_err}", exc_info=True) | |
raise VectorStoreInitError(f"๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ (Milvus ๋ฐ FAISS): {str(e)} / {str(faiss_err)}") | |
else: | |
# FAISS ์ฌ์ฉ | |
if documents: | |
try: | |
logger.info(f"FAISS ์ธ๋ฑ์ค ์์ฑ ์ค: {len(documents)}๊ฐ ๋ฌธ์") | |
self.vector_store = FAISS.from_documents(documents, self.embeddings) | |
logger.info("FAISS ์ธ๋ฑ์ค ์์ฑ ์๋ฃ") | |
except Exception as e: | |
logger.error(f"FAISS ์ธ๋ฑ์ค ์์ฑ ์คํจ: {e}", exc_info=True) | |
raise DocumentIndexError(f"FAISS ๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ: {str(e)}") | |
else: | |
try: | |
self.vector_store = self.init_faiss() | |
except VectorStoreInitError as e: | |
# ์ด๋ฏธ ๋ก๊น ๋จ | |
raise | |
return self.vector_store | |
def add_documents(self, documents: List[Document]) -> None: | |
""" | |
๋ฒกํฐ ์คํ ์ด์ ๋ฌธ์ ์ถ๊ฐ | |
Args: | |
documents: ์ถ๊ฐํ ๋ฌธ์ ๋ฆฌ์คํธ | |
""" | |
if not documents: | |
logger.warning("์ถ๊ฐํ ๋ฌธ์๊ฐ ์์ต๋๋ค") | |
return | |
try: | |
if self.vector_store is None: | |
logger.info("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์ ๋ฒกํฐ ์คํ ์ด๋ฅผ ์์ฑํฉ๋๋ค.") | |
self.create_or_load(documents) | |
else: | |
logger.info(f"{len(documents)}๊ฐ ๋ฌธ์๋ฅผ ๊ธฐ์กด ๋ฒกํฐ ์คํ ์ด์ ์ถ๊ฐํฉ๋๋ค") | |
self.vector_store.add_documents(documents) | |
logger.info(f"{len(documents)}๊ฐ ๋ฌธ์ ์ถ๊ฐ ์๋ฃ") | |
except Exception as e: | |
logger.error(f"๋ฌธ์ ์ถ๊ฐ ์คํจ: {e}", exc_info=True) | |
raise DocumentIndexError(f"๋ฒกํฐ ์คํ ์ด์ ๋ฌธ์ ์ถ๊ฐ ์คํจ: {str(e)}") | |
def similarity_search(self, query: str, k: int = 5) -> List[Document]: | |
""" | |
๋ฒกํฐ ์ ์ฌ๋ ๊ฒ์ ์ํ | |
Args: | |
query: ๊ฒ์ ์ฟผ๋ฆฌ | |
k: ๋ฐํํ ๊ฒฐ๊ณผ ์ | |
Returns: | |
์ ์ฌ๋๊ฐ ๋์ ๋ฌธ์ ๋ฆฌ์คํธ | |
""" | |
if not query or not query.strip(): | |
logger.warning("๋น ์ฟผ๋ฆฌ๋ก ๊ฒ์ ์๋") | |
return [] | |
if self.vector_store is None: | |
logger.error("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") | |
raise VectorSearchError("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") | |
try: | |
logger.info(f"๊ฒ์ ์ฟผ๋ฆฌ ์คํ: '{query[:50]}{'...' if len(query) > 50 else ''}', ์์ {k}๊ฐ ๊ฒฐ๊ณผ ์์ฒญ") | |
results = self.vector_store.similarity_search(query, k=k) | |
logger.info(f"๊ฒ์ ์๋ฃ: {len(results)}๊ฐ ๊ฒฐ๊ณผ ์ฐพ์") | |
return results | |
except Exception as e: | |
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
raise VectorSearchError(f"๋ฒกํฐ ๊ฒ์ ์คํจ: {str(e)}") | |
def save_local(self, path: str = "faiss_index") -> bool: | |
""" | |
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) | |
Args: | |
path: ์ ์ฅ ๊ฒฝ๋ก | |
Returns: | |
์ ์ฅ ์ฑ๊ณต ์ฌ๋ถ | |
""" | |
if self.vector_store is None: | |
logger.error("์ ์ฅํ ๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") | |
raise PersistenceError("์ ์ฅํ ๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") | |
# FAISS๋ง ๋ก์ปฌ ์ ์ฅ ๊ฐ๋ฅ | |
if not self.use_milvus: | |
try: | |
# ์ ์ฅ ๋๋ ํ ๋ฆฌ๊ฐ ์กด์ฌํ๋์ง ํ์ธ | |
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else path, exist_ok=True) | |
self.vector_store.save_local(path) | |
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ ์๋ฃ: {path}") | |
return True | |
except Exception as e: | |
logger.error(f"FAISS ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {e}", exc_info=True) | |
raise PersistenceError(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {str(e)}") | |
else: | |
logger.info("Milvus๋ ๋ก์ปฌ ์ ์ฅ์ด ํ์ํ์ง ์์ต๋๋ค") | |
return True | |
def load_local(self, path: str = "faiss_index") -> bool: | |
""" | |
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ๋ก๋ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) | |
Args: | |
path: ๋ก๋ํ ์ธ๋ฑ์ค ๊ฒฝ๋ก | |
Returns: | |
๋ก๋ ์ฑ๊ณต ์ฌ๋ถ | |
""" | |
if self.use_milvus: | |
logger.info("Milvus ์ฌ์ฉ ์ค์ด๋ฏ๋ก ๋ก์ปฌ ๋ก๋๋ฅผ ๊ฑด๋๋๋๋ค") | |
try: | |
# Milvus ์ฐ๊ฒฐ ํ์ธ | |
self.vector_store = self.init_milvus() | |
return True | |
except Exception as e: | |
logger.error(f"Milvus ์ฐ๊ฒฐ ์คํจ, FAISS๋ก ๋์ฒด: {e}") | |
self.use_milvus = False | |
# FAISS๋ก ๊ณ์ ์งํ | |
if not os.path.exists(path): | |
logger.warning(f"์ธ๋ฑ์ค ๊ฒฝ๋ก๊ฐ ์กด์ฌํ์ง ์์: {path}") | |
raise FileNotFoundError(f"๋ฒกํฐ ์ธ๋ฑ์ค ๊ฒฝ๋ก๊ฐ ์กด์ฌํ์ง ์์: {path}") | |
try: | |
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์ค: {path}") | |
# ์ญ์ง๋ ฌํ ํ์ฉ ์ต์ ์ถ๊ฐ (๋ณด์ ๊ฒฝ๊ณ ํ์ธ ํ์) | |
self.vector_store = FAISS.load_local( | |
path, | |
self.embeddings, | |
allow_dangerous_deserialization=True # ์ญ์ง๋ ฌํ ํ์ฉ | |
) | |
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ: {path}") | |
return True | |
except FileNotFoundError as e: | |
logger.error(f"FAISS ์ธ๋ฑ์ค ํ์ผ์ ์ฐพ์ ์ ์์: {e}") | |
raise PersistenceError(f"๋ฒกํฐ ์ธ๋ฑ์ค ํ์ผ์ ์ฐพ์ ์ ์์: {str(e)}") | |
except Exception as e: | |
logger.error(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์คํจ: {e}", exc_info=True) | |
# ์ค๋ฅ ์ธ๋ถ ์ ๋ณด ์ถ๋ ฅ | |
import traceback | |
logger.error(f"์์ธ ์ค๋ฅ: {traceback.format_exc()}") | |
# ์ ์ธ๋ฑ์ค ์ด๊ธฐํ | |
logger.warning("์ธ๋ฑ์ค ๋ก๋ ์คํจ๋ก ์ FAISS ์ธ๋ฑ์ค ์ด๊ธฐํ") | |
self.vector_store = self.init_faiss() | |
return False |