Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Dec 6, 2024

Commit

52078cc

verified ·

1 Parent(s): 0eea166

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -15

app.py CHANGED Viewed

@@ -326,9 +326,10 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
         texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
         all_emotions = []
-        # Enhanced embedding generation
         embeddings = []
         for i, text in enumerate(texts):
             text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
             chunk_embeddings = []
@@ -336,27 +337,20 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
                 chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
                 chunk_embeddings.append(chunk_embedding)
             full_embedding = np.mean(chunk_embeddings, axis=0)
             embeddings.append(full_embedding)
             progress = (i + 1) / len(texts) * 0.4
             progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
         embeddings = np.array(embeddings)
-        # Process emotions with proper chunking
         for i, text in enumerate(texts):
-            text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
-            chunk_emotions = []
-            for chunk in text_chunks:
-                emotion = emotion_classifier(chunk)[0]['label']
-                chunk_emotions.append(emotion)
-            # Use most common emotion for the full text
-            final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
-            all_emotions.append(final_emotion)
             progress = 0.4 + ((i + 1) / len(texts) * 0.3)
             progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
@@ -380,7 +374,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
     return summaries, topic_model
 try:
     bert_tokenizer, bert_model, emotion_classifier = load_models()
     st.success("Models loaded successfully!")

         texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
         all_emotions = []
+        # Get embeddings with proper output handling
         embeddings = []
         for i, text in enumerate(texts):
+            # Split text into chunks that respect the 512 token limit
             text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
             chunk_embeddings = []
                 chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
                 chunk_embeddings.append(chunk_embedding)
+            # Combine chunk embeddings to represent the full poem
             full_embedding = np.mean(chunk_embeddings, axis=0)
             embeddings.append(full_embedding)
             progress = (i + 1) / len(texts) * 0.4
             progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
         embeddings = np.array(embeddings)
+        # Process emotions with tuple output handling
         for i, text in enumerate(texts):
+            result = emotion_classifier(text)
+            emotion = result[0]  # Access first element of tuple
+            all_emotions.append(emotion)
             progress = 0.4 + ((i + 1) / len(texts) * 0.3)
             progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
     return summaries, topic_model
 try:
     bert_tokenizer, bert_model, emotion_classifier = load_models()
     st.success("Models loaded successfully!")