Spaces:

WALTERMAC
/

DatosPF

Sleeping

App Files Files Community

WALTERMAC commited on Apr 4

Commit

7ec3ded

verified ·

1 Parent(s): 99dd147

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -2

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from nltk.tokenize import word_tokenize
 from collections import Counter
 import plotly.express as px
 import emoji
 # Descargar recursos de nltk
 nltk.download('stopwords')
@@ -25,6 +26,15 @@ sentiment_analysis = pipeline('sentiment-analysis', model='dccuchile/bert-base-s
 def contiene_emojis(texto):
     return any(char in emoji.EMOJI_DATA for char in texto)
 # Función para procesar el archivo .txt de WhatsApp
 def cargar_chat_txt(file):
     content = file.getvalue().decode('utf-8')
@@ -68,6 +78,7 @@ def cargar_chat_txt(file):
     if not df.empty:
         df['FechaHora'] = pd.to_datetime(df['FechaHora'])
         return df
     else:
         return None
@@ -88,7 +99,7 @@ def extraer_bigrams_trigrams(mensaje):
 # Funciones de urgencia (autor, hora, sentimiento, palabras clave, etc.)
 def urgencia_por_autor(autor):
-    autores_prioritarios = ["Jefe", "Hijo", "Mamá", "Papá", "Esposa"]
     if any(char in autor for char in ["❤️", "💖", "💘", "💝", "💕"]):
         return 2
     return 2 if autor in autores_prioritarios else 0
@@ -104,7 +115,7 @@ def urgencia_por_sentimiento(sentimiento):
     return etiquetas.get(sentimiento, 0)
 def urgencia_por_palabras_clave(mensaje):
-    claves = ["urgente", "es urgente", "es para hoy", "necesito ayuda", "por favor", "con urgencia"]
     mensaje = mensaje.lower()
     return 1 if any(clave in mensaje for clave in claves) else 0

 from collections import Counter
 import plotly.express as px
 import emoji
+import string
 # Descargar recursos de nltk
 nltk.download('stopwords')
 def contiene_emojis(texto):
     return any(char in emoji.EMOJI_DATA for char in texto)
+# Función para limpiar texto (normalización + limpieza de caracteres especiales)
+def limpiar_texto(texto):
+    texto = texto.lower()
+    texto = re.sub(r'https?://\S+|www\.\S+', '', texto)  # eliminar URLs
+    texto = re.sub(r'\d+', '', texto)  # eliminar números
+    texto = texto.translate(str.maketrans('', '', string.punctuation))  # eliminar puntuación
+    texto = texto.strip()
+    return texto
 # Función para procesar el archivo .txt de WhatsApp
 def cargar_chat_txt(file):
     content = file.getvalue().decode('utf-8')
     if not df.empty:
         df['FechaHora'] = pd.to_datetime(df['FechaHora'])
+        df['Mensaje'] = df['Mensaje'].apply(limpiar_texto)
         return df
     else:
         return None
 # Funciones de urgencia (autor, hora, sentimiento, palabras clave, etc.)
 def urgencia_por_autor(autor):
+    autores_prioritarios = ["Jefe", "Hijo", "Mamá", "Papá", "Esposa", "Novia"]
     if any(char in autor for char in ["❤️", "💖", "💘", "💝", "💕"]):
         return 2
     return 2 if autor in autores_prioritarios else 0
     return etiquetas.get(sentimiento, 0)
 def urgencia_por_palabras_clave(mensaje):
+    claves = ["urgente", "es urgente", "es para hoy", "necesito ayuda", "por favor", "con urgencia", "rapido", "callo", "caer", "atropellado"]
     mensaje = mensaje.lower()
     return 1 if any(clave in mensaje for clave in claves) else 0