kambris commited on
Commit
52078cc
·
verified ·
1 Parent(s): 0eea166

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -326,9 +326,10 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
326
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
327
  all_emotions = []
328
 
329
- # Enhanced embedding generation
330
  embeddings = []
331
  for i, text in enumerate(texts):
 
332
  text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
333
  chunk_embeddings = []
334
 
@@ -336,27 +337,20 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
336
  chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
337
  chunk_embeddings.append(chunk_embedding)
338
 
 
339
  full_embedding = np.mean(chunk_embeddings, axis=0)
340
  embeddings.append(full_embedding)
341
 
342
  progress = (i + 1) / len(texts) * 0.4
343
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
344
-
345
  embeddings = np.array(embeddings)
346
 
347
- # Process emotions with proper chunking
348
  for i, text in enumerate(texts):
349
- text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
350
- chunk_emotions = []
351
-
352
- for chunk in text_chunks:
353
- emotion = emotion_classifier(chunk)[0]['label']
354
- chunk_emotions.append(emotion)
355
-
356
- # Use most common emotion for the full text
357
- final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
358
- all_emotions.append(final_emotion)
359
-
360
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
361
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
362
 
@@ -380,7 +374,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
380
 
381
  return summaries, topic_model
382
 
383
-
384
  try:
385
  bert_tokenizer, bert_model, emotion_classifier = load_models()
386
  st.success("Models loaded successfully!")
 
326
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
327
  all_emotions = []
328
 
329
+ # Get embeddings with proper output handling
330
  embeddings = []
331
  for i, text in enumerate(texts):
332
+ # Split text into chunks that respect the 512 token limit
333
  text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
334
  chunk_embeddings = []
335
 
 
337
  chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
338
  chunk_embeddings.append(chunk_embedding)
339
 
340
+ # Combine chunk embeddings to represent the full poem
341
  full_embedding = np.mean(chunk_embeddings, axis=0)
342
  embeddings.append(full_embedding)
343
 
344
  progress = (i + 1) / len(texts) * 0.4
345
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
346
+
347
  embeddings = np.array(embeddings)
348
 
349
+ # Process emotions with tuple output handling
350
  for i, text in enumerate(texts):
351
+ result = emotion_classifier(text)
352
+ emotion = result[0] # Access first element of tuple
353
+ all_emotions.append(emotion)
 
 
 
 
 
 
 
 
354
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
355
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
356
 
 
374
 
375
  return summaries, topic_model
376
 
 
377
  try:
378
  bert_tokenizer, bert_model, emotion_classifier = load_models()
379
  st.success("Models loaded successfully!")