joortif commited on
Commit
af2fcf7
·
verified ·
1 Parent(s): 846e9a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -10,15 +10,20 @@ from huggingface_hub import InferenceClient
10
  from rerankers import Reranker
11
  import os
12
 
13
- loader = PyPDFLoader("Constitucion_española.pdf")
14
- documents = loader.load()
15
 
16
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
17
- docs_split = text_splitter.split_documents(documents)
 
18
 
19
- embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
20
 
21
- vectordb = Chroma.from_documents(docs_split, embedding_function)
 
 
 
22
 
23
  client = InferenceClient("google/flan-t5-base", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
24
 
@@ -51,14 +56,12 @@ def test_rag_reranking(query, ranker):
51
 
52
  print(f"📚 Contextos pasados al ranker: {len(context)}")
53
 
54
- # ✅ Corregido: pasar solo lista de strings
55
  context_strings = [str(c) for c in context]
56
  #print(help(Reranker.models.ColBERTRanker.rank))
57
  reranked = ranker.rank(query=query, docs=context_strings)
58
 
59
  print(f"🏅 Resultado del reranker: {reranked}")
60
 
61
- # ✅ Seguridad en el acceso al mejor contexto
62
  best_context = reranked[0].document.text
63
  print(f"🧠 Contexto elegido: {best_context[:500]}...")
64
 
@@ -67,8 +70,6 @@ def test_rag_reranking(query, ranker):
67
 
68
  return respuesta
69
 
70
-
71
-
72
  def responder_chat(message, history):
73
  respuesta = test_rag_reranking(message, ranker)
74
  return respuesta
 
10
  from rerankers import Reranker
11
  import os
12
 
13
+ embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
14
+ persist_directory = "db"
15
 
16
+ if not os.path.exists(persist_directory):
17
+ loader = PyPDFLoader("Constitucion_española.pdf")
18
+ documents = loader.load()
19
 
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
21
+ docs_split = text_splitter.split_documents(documents)
22
 
23
+ vectordb = Chroma.from_documents(docs_split, embedding_function, persist_directory=persist_directory)
24
+ vectordb.persist()
25
+ else:
26
+ vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
27
 
28
  client = InferenceClient("google/flan-t5-base", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
29
 
 
56
 
57
  print(f"📚 Contextos pasados al ranker: {len(context)}")
58
 
 
59
  context_strings = [str(c) for c in context]
60
  #print(help(Reranker.models.ColBERTRanker.rank))
61
  reranked = ranker.rank(query=query, docs=context_strings)
62
 
63
  print(f"🏅 Resultado del reranker: {reranked}")
64
 
 
65
  best_context = reranked[0].document.text
66
  print(f"🧠 Contexto elegido: {best_context[:500]}...")
67
 
 
70
 
71
  return respuesta
72
 
 
 
73
  def responder_chat(message, history):
74
  respuesta = test_rag_reranking(message, ranker)
75
  return respuesta