Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -16,21 +16,38 @@ os.getenv("GROQ_API_KEY")
|
|
16 |
|
17 |
css_style = """
|
18 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
button {
|
20 |
-
height:
|
21 |
-
width:
|
22 |
-
font-size:
|
23 |
-
background-color: #
|
24 |
-
color:
|
25 |
-
border: none;
|
26 |
-
border-radius: 5px;
|
27 |
-
cursor: pointer;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
</style>
|
30 |
"""
|
31 |
|
32 |
def get_pdf_text(pdf_docs):
|
33 |
-
# Extraemos texto de los archivos cargados
|
34 |
text = ""
|
35 |
for pdf in pdf_docs:
|
36 |
pdf_reader = PdfReader(pdf)
|
@@ -39,19 +56,16 @@ def get_pdf_text(pdf_docs):
|
|
39 |
return text
|
40 |
|
41 |
def get_text_chunks(text):
|
42 |
-
# División del texto en fragmentos
|
43 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
|
44 |
chunks = text_splitter.split_text(text)
|
45 |
return chunks
|
46 |
|
47 |
def get_vector_store(text_chunks):
|
48 |
-
# Creación de almacén de vectores FAISS a partir de los fragmentos
|
49 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
50 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
51 |
vector_store.save_local("faiss_index")
|
52 |
|
53 |
def get_conversational_chain():
|
54 |
-
# Especificamos un prompt inicial al modelo
|
55 |
prompt_template = """
|
56 |
Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
|
57 |
el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
|
@@ -61,7 +75,6 @@ def get_conversational_chain():
|
|
61 |
{question}
|
62 |
Respuesta:
|
63 |
"""
|
64 |
-
# Implementamos el modelo
|
65 |
model = ChatGroq(
|
66 |
temperature=0.3,
|
67 |
model_name="deepseek-r1-distill-llama-70b",
|
@@ -71,87 +84,70 @@ def get_conversational_chain():
|
|
71 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
72 |
return chain
|
73 |
|
74 |
-
# Tratamiento para recoger el pensamiento del modelo
|
75 |
def eliminar_texto_entre_tags(texto):
|
76 |
patron = r'<think>.*?</think>'
|
77 |
texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
|
78 |
return texto_limpio
|
79 |
|
80 |
def user_input(user_question):
|
81 |
-
"""Maneja las consultas del usuario recuperando respuestas del almacén de vectores."""
|
82 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
83 |
-
|
84 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
85 |
docs = new_db.similarity_search(user_question)
|
86 |
-
|
87 |
chain = get_conversational_chain()
|
88 |
-
|
89 |
response = chain(
|
90 |
{"input_documents": docs, "question": user_question},
|
91 |
return_only_outputs=True
|
92 |
)
|
93 |
-
|
94 |
-
# Depuración: Imprimir la respuesta original
|
95 |
original_response = response['output_text']
|
96 |
print("Original Response:", original_response)
|
97 |
-
|
98 |
-
# Extraer el proceso de pensamiento
|
99 |
thought_process = ""
|
100 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
101 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
102 |
if thought_process_match:
|
103 |
thought_process = thought_process_match.group(1).strip()
|
104 |
-
|
105 |
-
# Eliminar el proceso de pensamiento de la respuesta principal
|
106 |
clean_response = eliminar_texto_entre_tags(original_response)
|
107 |
-
|
108 |
-
# Imprimir la respuesta limpia, sin las marcas <think> </think>
|
109 |
print("Cleaned Response:", clean_response)
|
110 |
-
|
111 |
-
# Mostrar el proceso de pensamiento del modelo en el expander
|
112 |
with st.expander("💭 Pensamiento del Modelo"):
|
113 |
st.write(thought_process)
|
114 |
-
|
115 |
st.markdown(f"### Respuesta:\n{clean_response}")
|
116 |
|
117 |
def main():
|
118 |
-
"""Función principal para ejecutar la aplicación Streamlit."""
|
119 |
st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
|
120 |
-
|
121 |
st.title("PDF Consultor 🔍")
|
122 |
-
|
123 |
st.markdown(css_style, unsafe_allow_html=True)
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
139 |
|
140 |
col1, col2, col3 = st.columns(3)
|
141 |
-
|
142 |
with col1:
|
143 |
if st.button("Resumen", key="resumen_button"):
|
144 |
user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
|
145 |
-
|
146 |
with col2:
|
147 |
if st.button("Entidad", key="entidad_button"):
|
148 |
user_input("A qué entidad pertenece el contenido del documento?")
|
149 |
-
|
150 |
with col3:
|
151 |
if st.button("Fecha implantación", key="fecha_button"):
|
152 |
user_input("En qué fecha se implantará el contenido del documento?")
|
153 |
-
|
154 |
-
user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?")
|
155 |
|
156 |
if user_question:
|
157 |
with st.spinner("Obteniendo tu respuesta..."):
|
@@ -159,3 +155,4 @@ def main():
|
|
159 |
|
160 |
if __name__ == "__main__":
|
161 |
main()
|
|
|
|
16 |
|
17 |
css_style = """
|
18 |
<style>
|
19 |
+
.step-number {
|
20 |
+
font-size: 24px;
|
21 |
+
font-weight: bold;
|
22 |
+
color: #4CAF50;
|
23 |
+
}
|
24 |
+
.step-text {
|
25 |
+
font-size: 18px;
|
26 |
+
color: #555;
|
27 |
+
}
|
28 |
button {
|
29 |
+
height: 35px;
|
30 |
+
width: 120px;
|
31 |
+
font-size: 14px;
|
32 |
+
background-color: #4CAF50;
|
33 |
+
color: white;
|
34 |
+
border: none;
|
35 |
+
border-radius: 5px;
|
36 |
+
cursor: pointer;
|
37 |
+
}
|
38 |
+
button:hover {
|
39 |
+
background-color: #45a049;
|
40 |
+
}
|
41 |
+
.custom-input {
|
42 |
+
font-size: 16px;
|
43 |
+
padding: 10px;
|
44 |
+
border-radius: 5px;
|
45 |
+
border: 1px solid #ccc;
|
46 |
}
|
47 |
</style>
|
48 |
"""
|
49 |
|
50 |
def get_pdf_text(pdf_docs):
|
|
|
51 |
text = ""
|
52 |
for pdf in pdf_docs:
|
53 |
pdf_reader = PdfReader(pdf)
|
|
|
56 |
return text
|
57 |
|
58 |
def get_text_chunks(text):
|
|
|
59 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
|
60 |
chunks = text_splitter.split_text(text)
|
61 |
return chunks
|
62 |
|
63 |
def get_vector_store(text_chunks):
|
|
|
64 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
65 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
66 |
vector_store.save_local("faiss_index")
|
67 |
|
68 |
def get_conversational_chain():
|
|
|
69 |
prompt_template = """
|
70 |
Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
|
71 |
el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
|
|
|
75 |
{question}
|
76 |
Respuesta:
|
77 |
"""
|
|
|
78 |
model = ChatGroq(
|
79 |
temperature=0.3,
|
80 |
model_name="deepseek-r1-distill-llama-70b",
|
|
|
84 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
85 |
return chain
|
86 |
|
|
|
87 |
def eliminar_texto_entre_tags(texto):
|
88 |
patron = r'<think>.*?</think>'
|
89 |
texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
|
90 |
return texto_limpio
|
91 |
|
92 |
def user_input(user_question):
|
|
|
93 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
94 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
95 |
docs = new_db.similarity_search(user_question)
|
|
|
96 |
chain = get_conversational_chain()
|
|
|
97 |
response = chain(
|
98 |
{"input_documents": docs, "question": user_question},
|
99 |
return_only_outputs=True
|
100 |
)
|
|
|
|
|
101 |
original_response = response['output_text']
|
102 |
print("Original Response:", original_response)
|
|
|
|
|
103 |
thought_process = ""
|
104 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
105 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
106 |
if thought_process_match:
|
107 |
thought_process = thought_process_match.group(1).strip()
|
|
|
|
|
108 |
clean_response = eliminar_texto_entre_tags(original_response)
|
|
|
|
|
109 |
print("Cleaned Response:", clean_response)
|
|
|
|
|
110 |
with st.expander("💭 Pensamiento del Modelo"):
|
111 |
st.write(thought_process)
|
|
|
112 |
st.markdown(f"### Respuesta:\n{clean_response}")
|
113 |
|
114 |
def main():
|
|
|
115 |
st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
|
|
|
116 |
st.title("PDF Consultor 🔍")
|
|
|
117 |
st.markdown(css_style, unsafe_allow_html=True)
|
118 |
+
|
119 |
+
st.sidebar.markdown('<p class="step-number">1️⃣</p> <p class="step-text">Subir archivo PDF</p>', unsafe_allow_html=True)
|
120 |
+
pdf_docs = st.sidebar.file_uploader(
|
121 |
+
"Subir archivo PDF",
|
122 |
+
accept_multiple_files=True,
|
123 |
+
type=["pdf"]
|
124 |
+
)
|
125 |
+
|
126 |
+
st.sidebar.markdown('<p class="step-number">2️⃣</p> <p class="step-text">Procesar el archivo</p>', unsafe_allow_html=True)
|
127 |
+
if st.sidebar.button("Procesar"):
|
128 |
+
with st.spinner("Procesando el archivo..."):
|
129 |
+
raw_text = get_pdf_text(pdf_docs)
|
130 |
+
text_chunks = get_text_chunks(raw_text)
|
131 |
+
get_vector_store(text_chunks)
|
132 |
+
st.sidebar.success("¡PDF procesado exitosamente!")
|
133 |
+
|
134 |
+
st.sidebar.markdown('<p class="step-number">3️⃣</p> <p class="step-text">Hacer una pregunta</p>', unsafe_allow_html=True)
|
135 |
|
136 |
col1, col2, col3 = st.columns(3)
|
137 |
+
|
138 |
with col1:
|
139 |
if st.button("Resumen", key="resumen_button"):
|
140 |
user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
|
141 |
+
|
142 |
with col2:
|
143 |
if st.button("Entidad", key="entidad_button"):
|
144 |
user_input("A qué entidad pertenece el contenido del documento?")
|
145 |
+
|
146 |
with col3:
|
147 |
if st.button("Fecha implantación", key="fecha_button"):
|
148 |
user_input("En qué fecha se implantará el contenido del documento?")
|
149 |
+
|
150 |
+
user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?", key="custom-input")
|
151 |
|
152 |
if user_question:
|
153 |
with st.spinner("Obteniendo tu respuesta..."):
|
|
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
main()
|
158 |
+
|