Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
from PyPDF2 import PdfReader
|
4 |
from docx import Document
|
@@ -9,28 +16,22 @@ import torch
|
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings
|
11 |
from langchain.vectorstores import FAISS
|
12 |
-
from huggingface_hub import login, InferenceClient
|
13 |
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# Configuraci贸n del cliente de inferencia
|
21 |
-
@st.cache_resource
|
22 |
-
def load_inference_client():
|
23 |
-
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
|
24 |
-
return client
|
25 |
-
|
26 |
-
client = load_inference_client()
|
27 |
|
28 |
# Configuraci贸n del modelo de clasificaci贸n
|
29 |
@st.cache_resource
|
30 |
def load_classification_model():
|
31 |
-
|
32 |
-
|
33 |
-
return
|
34 |
|
35 |
classification_model, classification_tokenizer = load_classification_model()
|
36 |
|
@@ -60,29 +61,21 @@ def create_vector_store(docs):
|
|
60 |
return vector_store
|
61 |
|
62 |
def translate(text, target_language):
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
'''
|
70 |
-
messages = [{"role": "user", "content": template}]
|
71 |
-
response = client.chat(messages)
|
72 |
-
translated_text = response.generated_text
|
73 |
return translated_text
|
74 |
|
75 |
def summarize(text, length):
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
'''
|
83 |
-
messages = [{"role": "user", "content": template}]
|
84 |
-
response = client.chat(messages)
|
85 |
-
summarized_text = response.generated_text
|
86 |
return summarized_text
|
87 |
|
88 |
def handle_uploaded_file(uploaded_file):
|
@@ -122,9 +115,6 @@ def main():
|
|
122 |
st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
|
123 |
st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
|
124 |
|
125 |
-
for msg in st.session_state.messages:
|
126 |
-
st.write(f"**{msg['role'].capitalize()}:** {msg['content']}")
|
127 |
-
|
128 |
user_input = st.text_input("Introduce tu consulta:", "")
|
129 |
|
130 |
if user_input:
|
@@ -148,10 +138,12 @@ def main():
|
|
148 |
vector_store = create_vector_store(docs)
|
149 |
search_docs = vector_store.similarity_search(user_input)
|
150 |
context = " ".join([doc.page_content for doc in search_docs])
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
155 |
elif operation == "Resumir":
|
156 |
if summary_length == "corto":
|
157 |
length = "de aproximadamente 50 palabras"
|
@@ -163,9 +155,12 @@ def main():
|
|
163 |
elif operation == "Traducir":
|
164 |
bot_response = translate(user_input, target_language)
|
165 |
else:
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
169 |
|
170 |
st.session_state.messages.append({"role": "assistant", "content": bot_response})
|
171 |
st.write(f"**Assistant:** {bot_response}")
|
|
|
1 |
import streamlit as st
|
2 |
+
from huggingface_hub import snapshot_download
|
3 |
+
from pathlib import Path
|
4 |
+
from mistral_inference.model import Transformer
|
5 |
+
from mistral_inference.generate import generate
|
6 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
7 |
+
from mistral_common.protocol.instruct.messages import UserMessage
|
8 |
+
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
10 |
from PyPDF2 import PdfReader
|
11 |
from docx import Document
|
|
|
16 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
17 |
from langchain.embeddings import HuggingFaceEmbeddings
|
18 |
from langchain.vectorstores import FAISS
|
|
|
19 |
|
20 |
+
# Descargar y configurar el modelo
|
21 |
+
mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
|
22 |
+
mistral_models_path.mkdir(parents=True, exist_ok=True)
|
23 |
+
snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)
|
24 |
|
25 |
+
# Configurar el modelo y el tokenizador
|
26 |
+
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
|
27 |
+
model = Transformer.from_folder(mistral_models_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# Configuraci贸n del modelo de clasificaci贸n
|
30 |
@st.cache_resource
|
31 |
def load_classification_model():
|
32 |
+
tokenizer_cls = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
|
33 |
+
model_cls = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
|
34 |
+
return model_cls, tokenizer_cls
|
35 |
|
36 |
classification_model, classification_tokenizer = load_classification_model()
|
37 |
|
|
|
61 |
return vector_store
|
62 |
|
63 |
def translate(text, target_language):
|
64 |
+
completion_request = ChatCompletionRequest(
|
65 |
+
messages=[UserMessage(content=f"Por favor, traduzca el siguiente documento al {target_language}:\n{text}\nAseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.")]
|
66 |
+
)
|
67 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
68 |
+
out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
69 |
+
translated_text = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
|
|
|
|
|
|
|
|
|
70 |
return translated_text
|
71 |
|
72 |
def summarize(text, length):
|
73 |
+
completion_request = ChatCompletionRequest(
|
74 |
+
messages=[UserMessage(content=f"Por favor, haga un resumen {length} del siguiente documento:\n{text}\nAseg煤rese de que el resumen sea conciso y conserve el significado original del documento.")]
|
75 |
+
)
|
76 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
77 |
+
out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
78 |
+
summarized_text = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
|
|
|
|
|
|
|
|
|
79 |
return summarized_text
|
80 |
|
81 |
def handle_uploaded_file(uploaded_file):
|
|
|
115 |
st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
|
116 |
st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
|
117 |
|
|
|
|
|
|
|
118 |
user_input = st.text_input("Introduce tu consulta:", "")
|
119 |
|
120 |
if user_input:
|
|
|
138 |
vector_store = create_vector_store(docs)
|
139 |
search_docs = vector_store.similarity_search(user_input)
|
140 |
context = " ".join([doc.page_content for doc in search_docs])
|
141 |
+
completion_request = ChatCompletionRequest(
|
142 |
+
messages=[UserMessage(content=f"Contexto: {context}\n\nPregunta: {user_input}")]
|
143 |
+
)
|
144 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
145 |
+
out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
146 |
+
bot_response = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
|
147 |
elif operation == "Resumir":
|
148 |
if summary_length == "corto":
|
149 |
length = "de aproximadamente 50 palabras"
|
|
|
155 |
elif operation == "Traducir":
|
156 |
bot_response = translate(user_input, target_language)
|
157 |
else:
|
158 |
+
completion_request = ChatCompletionRequest(
|
159 |
+
messages=[UserMessage(content=user_input)]
|
160 |
+
)
|
161 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
162 |
+
out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
163 |
+
bot_response = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
|
164 |
|
165 |
st.session_state.messages.append({"role": "assistant", "content": bot_response})
|
166 |
st.write(f"**Assistant:** {bot_response}")
|