Spaces:

manuelcozar55
/

LexAIcon_Mistral7B

Paused

App Files Files Community

manuelcozar55 commited on Jun 22, 2024

Commit

97ab717

verified ·

1 Parent(s): 028afc4

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -43

app.py CHANGED Viewed

@@ -12,11 +12,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import os
-huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
-# Realizar el inicio de sesión de Hugging Face solo si el token está disponible
-if huggingface_token:
-    login(token=huggingface_token)
 # Configuración del modelo LLM
 llm = HuggingFaceEndpoint(
@@ -34,14 +30,6 @@ model = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longfo
 id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}
-def read_file(file):
-    file_path = file.name
-    if file_path.endswith('.pdf'):
-        return read_pdf(file_path)
-    else:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            return f.read()
 def read_pdf(file_path):
     pdf_reader = PyPDF2.PdfReader(file_path)
     text = ""
@@ -49,20 +37,21 @@ def read_pdf(file_path):
         text += pdf_reader.pages[page].extract_text()
     return text
-def summarize(text, summary_length):
-    if summary_length == 'Corto':
-        length_instruction = "El resumen debe tener un máximo de 100 palabras."
-    elif summary_length == 'Medio':
-        length_instruction = "El resumen debe tener un máximo de 500 palabras."
     else:
-        length_instruction = "El resumen debe tener un máximo de 1000 palabras."
-    template = f'''
     Por favor, lea detenidamente el siguiente documento:
 <document>
-{{TEXT}}
 </document>
-Después de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. {length_instruction}
 Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que también es conciso en la expresión de cada punto del resumen. Omita los detalles menores y concéntrese en los temas centrales y hechos importantes.
     '''
@@ -74,7 +63,7 @@ Su objetivo es ser exhaustivo en la captura del contenido central del documento,
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
-    return output_summary.content
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
@@ -84,9 +73,17 @@ def classify_text(text):
     logits = outputs.logits
     predicted_class_id = logits.argmax(dim=-1).item()
     predicted_label = id2label[predicted_class_id]
-    return f"Clasificación: {predicted_label}"
-def translate(text, target_language):
     template = '''
     Por favor, traduzca el siguiente documento al {LANGUAGE}:
 <document>
@@ -105,17 +102,35 @@ Asegúrese de que la traducción sea precisa y conserve el significado original
     return f"Prompt:\n{formatted_prompt}\n\nTraducción:\n{translated_text.content}"
-def process_file(file, action, target_language=None, summary_length=None):
-    text = read_file(file)
     if action == "Resumen":
-        return summarize(text, summary_length)
     elif action == "Clasificar":
         return classify_text(text)
     elif action == "Traducir":
-        return translate(text, target_language)
     else:
         return "Acción no válida"
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
     gr.Markdown("## Procesador de Documentos")
@@ -124,26 +139,34 @@ with gr.Blocks() as demo:
         with gr.Column():
             file = gr.File(label="Subir un archivo")
             action = gr.Radio(label="Seleccione una acción", choices=["Resumen", "Clasificar", "Traducir"])
-            summary_length = gr.Radio(label="Seleccione la longitud del resumen", choices=["Corto", "Medio", "Largo"], visible=False)
             target_language = gr.Dropdown(label="Seleccionar idioma de traducción", choices=["en", "fr", "de"], visible=False)
         with gr.Column():
-            output_text = gr.Textbox(label="Resultado", lines=60)
-    def update_ui(action):
         if action == "Traducir":
-            return gr.update(visible=False), gr.update(visible=True)
-        elif action == "Resumen":
-            return gr.update(visible=True), gr.update(visible=False)
-        elif action == "Clasificar":
-            return gr.update(visible=False), gr.update(visible(False))
         else:
-            return gr.update(visible=False), gr.update(visible(False))
-    action.change(update_ui, inputs=action, outputs=[summary_length, target_language])
     submit_button = gr.Button("Procesar")
-    submit_button.click(process_file, inputs=[file, action, target_language, summary_length], outputs=output_text)
 # Ejecutar la aplicación Gradio
-demo.launch(share=True)

 import torch
 import os
+login(token=os.getenv('HUGGINGFACE_TOKEN'))
 # Configuración del modelo LLM
 llm = HuggingFaceEndpoint(
 id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}
 def read_pdf(file_path):
     pdf_reader = PyPDF2.PdfReader(file_path)
     text = ""
         text += pdf_reader.pages[page].extract_text()
     return text
+def summarize(file):
+    # Leer el contenido del archivo subido
+    file_path = file.name
+    if file_path.endswith('.pdf'):
+        text = read_pdf(file_path)
     else:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            text = f.read()
+    template = '''
     Por favor, lea detenidamente el siguiente documento:
 <document>
+{TEXT}
 </document>
+Después de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. Organice estos puntos clave en una lista con viñetas concisa que resuma la información esencial del documento. El resumen debe tener un máximo de 10 puntos.
 Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que también es conciso en la expresión de cada punto del resumen. Omita los detalles menores y concéntrese en los temas centrales y hechos importantes.
     '''
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
+    return f"Prompt:\n{formatted_prompt}\n\nResumen:\n{output_summary.content}"
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
     logits = outputs.logits
     predicted_class_id = logits.argmax(dim=-1).item()
     predicted_label = id2label[predicted_class_id]
+    return predicted_label
+def translate(file, target_language):
+    # Leer el contenido del archivo subido
+    file_path = file.name
+    if file_path.endswith('.pdf'):
+        text = read_pdf(file_path)
+    else:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            text = f.read()
     template = '''
     Por favor, traduzca el siguiente documento al {LANGUAGE}:
 <document>
     return f"Prompt:\n{formatted_prompt}\n\nTraducción:\n{translated_text.content}"
+def process_file(file, action, target_language=None):
     if action == "Resumen":
+        return summarize(file)
     elif action == "Clasificar":
+        file_path = file.name
+        if file_path.endswith('.pdf'):
+            text = read_pdf(file_path)
+        else:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
         return classify_text(text)
     elif action == "Traducir":
+        return translate(file, target_language)
     else:
         return "Acción no válida"
+def download_text(output_text, filename='output.txt'):
+    if output_text:
+        file_path = Path(filename)
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(output_text)
+        return file_path
+    else:
+        return None
+def create_download_file(output_text, filename='output.txt'):
+    file_path = download_text(output_text, filename)
+    return str(file_path) if file_path else None
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
     gr.Markdown("## Procesador de Documentos")
         with gr.Column():
             file = gr.File(label="Subir un archivo")
             action = gr.Radio(label="Seleccione una acción", choices=["Resumen", "Clasificar", "Traducir"])
             target_language = gr.Dropdown(label="Seleccionar idioma de traducción", choices=["en", "fr", "de"], visible=False)
         with gr.Column():
+            output_text = gr.Textbox(label="Resultado", lines=20)
+    def update_language_dropdown(action):
         if action == "Traducir":
+            return gr.update(visible=True)
         else:
+            return gr.update(visible=False)
+    action.change(update_language_dropdown, inputs=action, outputs=target_language)
     submit_button = gr.Button("Procesar")
+    submit_button.click(process_file, inputs=[file, action, target_language], outputs=output_text)
+    def generate_file():
+        summary_text = output_text.value
+        filename = 'translation.txt' if action.value == 'Traducir' else 'summary.txt'
+        file_path = download_text(summary_text, filename)
+        return file_path
+    download_button = gr.Button("Descargar Resultado")
+    download_button.click(
+        fn=generate_file,
+        inputs=[],
+        outputs=gr.File()
+    )
 # Ejecutar la aplicación Gradio
+demo.launch(share=True)