Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -326,9 +326,10 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
326 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
327 |
all_emotions = []
|
328 |
|
329 |
-
#
|
330 |
embeddings = []
|
331 |
for i, text in enumerate(texts):
|
|
|
332 |
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
333 |
chunk_embeddings = []
|
334 |
|
@@ -336,27 +337,20 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
336 |
chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
|
337 |
chunk_embeddings.append(chunk_embedding)
|
338 |
|
|
|
339 |
full_embedding = np.mean(chunk_embeddings, axis=0)
|
340 |
embeddings.append(full_embedding)
|
341 |
|
342 |
progress = (i + 1) / len(texts) * 0.4
|
343 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
344 |
-
|
345 |
embeddings = np.array(embeddings)
|
346 |
|
347 |
-
# Process emotions with
|
348 |
for i, text in enumerate(texts):
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
for chunk in text_chunks:
|
353 |
-
emotion = emotion_classifier(chunk)[0]['label']
|
354 |
-
chunk_emotions.append(emotion)
|
355 |
-
|
356 |
-
# Use most common emotion for the full text
|
357 |
-
final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
|
358 |
-
all_emotions.append(final_emotion)
|
359 |
-
|
360 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
361 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
362 |
|
@@ -380,7 +374,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
380 |
|
381 |
return summaries, topic_model
|
382 |
|
383 |
-
|
384 |
try:
|
385 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
386 |
st.success("Models loaded successfully!")
|
|
|
326 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
327 |
all_emotions = []
|
328 |
|
329 |
+
# Get embeddings with proper output handling
|
330 |
embeddings = []
|
331 |
for i, text in enumerate(texts):
|
332 |
+
# Split text into chunks that respect the 512 token limit
|
333 |
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
334 |
chunk_embeddings = []
|
335 |
|
|
|
337 |
chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
|
338 |
chunk_embeddings.append(chunk_embedding)
|
339 |
|
340 |
+
# Combine chunk embeddings to represent the full poem
|
341 |
full_embedding = np.mean(chunk_embeddings, axis=0)
|
342 |
embeddings.append(full_embedding)
|
343 |
|
344 |
progress = (i + 1) / len(texts) * 0.4
|
345 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
346 |
+
|
347 |
embeddings = np.array(embeddings)
|
348 |
|
349 |
+
# Process emotions with tuple output handling
|
350 |
for i, text in enumerate(texts):
|
351 |
+
result = emotion_classifier(text)
|
352 |
+
emotion = result[0] # Access first element of tuple
|
353 |
+
all_emotions.append(emotion)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
355 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
356 |
|
|
|
374 |
|
375 |
return summaries, topic_model
|
376 |
|
|
|
377 |
try:
|
378 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
379 |
st.success("Models loaded successfully!")
|