Update app.py
Browse files
app.py
CHANGED
@@ -10,15 +10,20 @@ from huggingface_hub import InferenceClient
|
|
10 |
from rerankers import Reranker
|
11 |
import os
|
12 |
|
13 |
-
|
14 |
-
|
15 |
|
16 |
-
|
17 |
-
|
|
|
18 |
|
19 |
-
|
|
|
20 |
|
21 |
-
vectordb = Chroma.from_documents(docs_split, embedding_function)
|
|
|
|
|
|
|
22 |
|
23 |
client = InferenceClient("google/flan-t5-base", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
24 |
|
@@ -51,14 +56,12 @@ def test_rag_reranking(query, ranker):
|
|
51 |
|
52 |
print(f"📚 Contextos pasados al ranker: {len(context)}")
|
53 |
|
54 |
-
# ✅ Corregido: pasar solo lista de strings
|
55 |
context_strings = [str(c) for c in context]
|
56 |
#print(help(Reranker.models.ColBERTRanker.rank))
|
57 |
reranked = ranker.rank(query=query, docs=context_strings)
|
58 |
|
59 |
print(f"🏅 Resultado del reranker: {reranked}")
|
60 |
|
61 |
-
# ✅ Seguridad en el acceso al mejor contexto
|
62 |
best_context = reranked[0].document.text
|
63 |
print(f"🧠 Contexto elegido: {best_context[:500]}...")
|
64 |
|
@@ -67,8 +70,6 @@ def test_rag_reranking(query, ranker):
|
|
67 |
|
68 |
return respuesta
|
69 |
|
70 |
-
|
71 |
-
|
72 |
def responder_chat(message, history):
|
73 |
respuesta = test_rag_reranking(message, ranker)
|
74 |
return respuesta
|
|
|
10 |
from rerankers import Reranker
|
11 |
import os
|
12 |
|
13 |
+
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
14 |
+
persist_directory = "db"
|
15 |
|
16 |
+
if not os.path.exists(persist_directory):
|
17 |
+
loader = PyPDFLoader("Constitucion_española.pdf")
|
18 |
+
documents = loader.load()
|
19 |
|
20 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
21 |
+
docs_split = text_splitter.split_documents(documents)
|
22 |
|
23 |
+
vectordb = Chroma.from_documents(docs_split, embedding_function, persist_directory=persist_directory)
|
24 |
+
vectordb.persist()
|
25 |
+
else:
|
26 |
+
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
|
27 |
|
28 |
client = InferenceClient("google/flan-t5-base", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
29 |
|
|
|
56 |
|
57 |
print(f"📚 Contextos pasados al ranker: {len(context)}")
|
58 |
|
|
|
59 |
context_strings = [str(c) for c in context]
|
60 |
#print(help(Reranker.models.ColBERTRanker.rank))
|
61 |
reranked = ranker.rank(query=query, docs=context_strings)
|
62 |
|
63 |
print(f"🏅 Resultado del reranker: {reranked}")
|
64 |
|
|
|
65 |
best_context = reranked[0].document.text
|
66 |
print(f"🧠 Contexto elegido: {best_context[:500]}...")
|
67 |
|
|
|
70 |
|
71 |
return respuesta
|
72 |
|
|
|
|
|
73 |
def responder_chat(message, history):
|
74 |
respuesta = test_rag_reranking(message, ranker)
|
75 |
return respuesta
|