Spaces:
Runtime error
Runtime error
# vectordb_relank_law.py | |
import faiss | |
import numpy as np | |
import os | |
from chromadb import PersistentClient | |
from chromadb.utils import embedding_functions | |
from sentence_transformers import SentenceTransformer | |
from retriever.reranker import rerank_documents | |
from constants.embedding_models import embedding_models | |
# chroma vector config v2 | |
# law_db config v2 | |
CHROMA_PATH = os.path.abspath("data/index/exam_db") | |
COLLECTION_NAME = "exam_all" | |
EMBEDDING_MODEL_NAME = embedding_models[1] # μ¬μ©νκ³ μ νλ λͺ¨λΈ μ ν | |
# 1. μλ² λ© λͺ¨λΈ λ‘λ v2 | |
# embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME) | |
# 2. μλ² λ© ν¨μ μ€μ | |
embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBEDDING_MODEL_NAME) | |
# 3. Chroma ν΄λΌμ΄μΈνΈ λ° μ»¬λ μ λ‘λ | |
client = PersistentClient(path=CHROMA_PATH) | |
collection = client.get_collection(name=COLLECTION_NAME, embedding_function=embedding_fn) | |
# 4. κ²μ ν¨μ | |
def search_documents(query: str, top_k: int = 5): | |
print(f"\nπ κ²μμ΄: '{query}'") | |
results = collection.query( | |
query_texts=[query], | |
n_results=top_k, | |
include=["documents", "metadatas", "distances"] | |
) | |
# λ¬Έμ 리μ€νΈλ§ μΆμΆ | |
docs = results['documents'][0] | |
metadatas = results['metadatas'][0] | |
distances = results['distances'][0] | |
# Rerank λ¬Έμ | |
reranked_docs = rerank_documents(query, docs, top_k=top_k) | |
reranked_data = [] | |
for doc in reranked_docs: | |
idx = docs.index(doc) | |
reranked_data.append((doc, metadatas[idx], distances[idx])) | |
# for i, (doc, meta, dist) in enumerate(reranked_data): | |
# print(f"\nπ κ²°κ³Ό {i+1} (μ μ¬λ: {1 - dist:.2f})") | |
# print(f"λ¬Έμ: {doc[:150]}...") | |
# print("λ©νλ°μ΄ν°:") | |
# print(meta) | |
return reranked_data # νμνλ©΄ λ¦¬ν΄ | |
# for i, (doc, meta, dist) in enumerate(zip( | |
# results['documents'][0], | |
# results['metadatas'][0], | |
# results['distances'][0] | |
# )): | |
# print(f"\nπ κ²°κ³Ό {i+1} (μ μ¬λ: {1 - dist:.2f})") | |
# print(f"λ¬Έμ: {doc[:150]}...") | |
# print("λ©νλ°μ΄ν°:") | |
# print(meta) |