manuelcozar55 commited on
Commit
a876827
verified
1 Parent(s): 62d5ecf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -46
app.py CHANGED
@@ -1,4 +1,11 @@
1
  import streamlit as st
 
 
 
 
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  from PyPDF2 import PdfReader
4
  from docx import Document
@@ -9,28 +16,22 @@ import torch
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.vectorstores import FAISS
12
- from huggingface_hub import login, InferenceClient
13
 
14
- huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
 
 
 
15
 
16
- # Realizar el inicio de sesi贸n de Hugging Face solo si el token est谩 disponible
17
- if huggingface_token:
18
- login(token=huggingface_token)
19
-
20
- # Configuraci贸n del cliente de inferencia
21
- @st.cache_resource
22
- def load_inference_client():
23
- client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
24
- return client
25
-
26
- client = load_inference_client()
27
 
28
  # Configuraci贸n del modelo de clasificaci贸n
29
  @st.cache_resource
30
  def load_classification_model():
31
- tokenizer = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
32
- model = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
33
- return model, tokenizer
34
 
35
  classification_model, classification_tokenizer = load_classification_model()
36
 
@@ -60,29 +61,21 @@ def create_vector_store(docs):
60
  return vector_store
61
 
62
  def translate(text, target_language):
63
- template = f'''
64
- Por favor, traduzca el siguiente documento al {target_language}:
65
- <document>
66
- {text}
67
- </document>
68
- Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.
69
- '''
70
- messages = [{"role": "user", "content": template}]
71
- response = client.chat(messages)
72
- translated_text = response.generated_text
73
  return translated_text
74
 
75
  def summarize(text, length):
76
- template = f'''
77
- Por favor, haga un resumen {length} del siguiente documento:
78
- <document>
79
- {text}
80
- </document>
81
- Aseg煤rese de que el resumen sea conciso y conserve el significado original del documento.
82
- '''
83
- messages = [{"role": "user", "content": template}]
84
- response = client.chat(messages)
85
- summarized_text = response.generated_text
86
  return summarized_text
87
 
88
  def handle_uploaded_file(uploaded_file):
@@ -122,9 +115,6 @@ def main():
122
  st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
123
  st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
124
 
125
- for msg in st.session_state.messages:
126
- st.write(f"**{msg['role'].capitalize()}:** {msg['content']}")
127
-
128
  user_input = st.text_input("Introduce tu consulta:", "")
129
 
130
  if user_input:
@@ -148,10 +138,12 @@ def main():
148
  vector_store = create_vector_store(docs)
149
  search_docs = vector_store.similarity_search(user_input)
150
  context = " ".join([doc.page_content for doc in search_docs])
151
- prompt_with_context = f"Contexto: {context}\n\nPregunta: {user_input}"
152
- messages = [{"role": "user", "content": prompt_with_context}]
153
- response = client.chat(messages)
154
- bot_response = response.generated_text
 
 
155
  elif operation == "Resumir":
156
  if summary_length == "corto":
157
  length = "de aproximadamente 50 palabras"
@@ -163,9 +155,12 @@ def main():
163
  elif operation == "Traducir":
164
  bot_response = translate(user_input, target_language)
165
  else:
166
- messages = [{"role": "user", "content": user_input}]
167
- response = client.chat(messages)
168
- bot_response = response.generated_text
 
 
 
169
 
170
  st.session_state.messages.append({"role": "assistant", "content": bot_response})
171
  st.write(f"**Assistant:** {bot_response}")
 
1
  import streamlit as st
2
+ from huggingface_hub import snapshot_download
3
+ from pathlib import Path
4
+ from mistral_inference.model import Transformer
5
+ from mistral_inference.generate import generate
6
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
7
+ from mistral_common.protocol.instruct.messages import UserMessage
8
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  from PyPDF2 import PdfReader
11
  from docx import Document
 
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
  from langchain.embeddings import HuggingFaceEmbeddings
18
  from langchain.vectorstores import FAISS
 
19
 
20
+ # Descargar y configurar el modelo
21
+ mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
22
+ mistral_models_path.mkdir(parents=True, exist_ok=True)
23
+ snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)
24
 
25
+ # Configurar el modelo y el tokenizador
26
+ tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
27
+ model = Transformer.from_folder(mistral_models_path)
 
 
 
 
 
 
 
 
28
 
29
  # Configuraci贸n del modelo de clasificaci贸n
30
  @st.cache_resource
31
  def load_classification_model():
32
+ tokenizer_cls = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
33
+ model_cls = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
34
+ return model_cls, tokenizer_cls
35
 
36
  classification_model, classification_tokenizer = load_classification_model()
37
 
 
61
  return vector_store
62
 
63
  def translate(text, target_language):
64
+ completion_request = ChatCompletionRequest(
65
+ messages=[UserMessage(content=f"Por favor, traduzca el siguiente documento al {target_language}:\n{text}\nAseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.")]
66
+ )
67
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
68
+ out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
69
+ translated_text = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
 
 
 
 
70
  return translated_text
71
 
72
  def summarize(text, length):
73
+ completion_request = ChatCompletionRequest(
74
+ messages=[UserMessage(content=f"Por favor, haga un resumen {length} del siguiente documento:\n{text}\nAseg煤rese de que el resumen sea conciso y conserve el significado original del documento.")]
75
+ )
76
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
77
+ out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
78
+ summarized_text = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
 
 
 
 
79
  return summarized_text
80
 
81
  def handle_uploaded_file(uploaded_file):
 
115
  st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
116
  st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
117
 
 
 
 
118
  user_input = st.text_input("Introduce tu consulta:", "")
119
 
120
  if user_input:
 
138
  vector_store = create_vector_store(docs)
139
  search_docs = vector_store.similarity_search(user_input)
140
  context = " ".join([doc.page_content for doc in search_docs])
141
+ completion_request = ChatCompletionRequest(
142
+ messages=[UserMessage(content=f"Contexto: {context}\n\nPregunta: {user_input}")]
143
+ )
144
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
145
+ out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
146
+ bot_response = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
147
  elif operation == "Resumir":
148
  if summary_length == "corto":
149
  length = "de aproximadamente 50 palabras"
 
155
  elif operation == "Traducir":
156
  bot_response = translate(user_input, target_language)
157
  else:
158
+ completion_request = ChatCompletionRequest(
159
+ messages=[UserMessage(content=user_input)]
160
+ )
161
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
162
+ out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
163
+ bot_response = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
164
 
165
  st.session_state.messages.append({"role": "assistant", "content": bot_response})
166
  st.write(f"**Assistant:** {bot_response}")