AA_T5

Sleeping

App Files Files Community

ahm14 commited on Feb 17

Commit

34d7c10

verified ·

1 Parent(s): 773ca30

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -81

app.py CHANGED Viewed

@@ -1,15 +1,27 @@
 import streamlit as st
 import re
-from langdetect import detect
-from transformers import pipeline
 import nltk
 from docx import Document
 import io
 # Download required NLTK resources
-nltk.download('punkt')
-# Tone categories
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
@@ -17,14 +29,13 @@ tone_categories = {
     "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
     "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
     "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
-    "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
     "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
     "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
     "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
-# Frame categories
 frame_categories = {
     "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
     "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
@@ -47,47 +58,56 @@ frame_categories = {
 def detect_language(text):
     try:
         return detect(text)
-    except:
         return "unknown"
-# Extract tone
 def extract_tone(text):
-    detected_tones = []
     for category, keywords in tone_categories.items():
-        if any(keyword in text.lower() for keyword in keywords):
-            detected_tones.append(category)
-    return detected_tones if detected_tones else ["Neutral"]
-# Categorize frames based on importance
-def categorize_frame_importance(text, keywords):
-    keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
-    if keyword_count > 2:
-        return "Major Focus"
-    elif keyword_count == 1 or keyword_count == 2:
-        return "Significant Focus"
-    else:
-        return "Minor Mention"
-# Extract frames with categorization
-def extract_frames(text):
-    detected_frames = {}
-    for category, keywords in frame_categories.items():
-        importance = categorize_frame_importance(text, keywords)
-        if importance != "Minor Mention":
-            detected_frames[category] = importance
-    if not detected_frames:
-        frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-        model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
-        for label in model_result["labels"][:2]:  # Top 2 frames
-            detected_frames[label] = "Significant Focus"
-    return detected_frames
 # Extract hashtags
 def extract_hashtags(text):
     return re.findall(r"#\w+", text)
 # Extract captions from DOCX
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
@@ -100,35 +120,10 @@ def extract_captions_from_docx(docx_file):
             captions[current_post] = []
         elif current_post:
             captions[current_post].append(text)
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
-# Generate a DOCX file
-def generate_docx(output_data):
-    doc = Document()
-    doc.add_heading('Activism Message Analysis', 0)
-    for index, (caption, result) in enumerate(output_data.items(), start=1):
-        doc.add_heading(f"{index}. {caption}", level=1)
-        doc.add_paragraph("Full Caption:")
-        doc.add_paragraph(result['Full Caption'], style="Quote")
-        doc.add_paragraph(f"Language: {result['Language']}")
-        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
-        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
-        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
-        doc.add_heading('Frames:', level=2)
-        for frame, importance in result['Frames'].items():
-            doc.add_paragraph(f"{frame}: {importance}")
-    doc_io = io.BytesIO()
-    doc.save(doc_io)
-    doc_io.seek(0)
-    return doc_io
-# Streamlit UI
-st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
 st.write("Enter text or upload a DOCX file for analysis:")
@@ -138,32 +133,31 @@ input_text = st.text_area("Input Text", height=200)
 # File upload
 uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
 output_data = {}
 if input_text:
     output_data["Manual Input"] = {
-        'Full Caption': input_text,
-        'Language': detect_language(input_text),
-        'Tone of Caption': extract_tone(input_text),
-        'Hashtags': extract_hashtags(input_text),
-        'Hashtag Count': len(extract_hashtags(input_text)),
-        'Frames': extract_frames(input_text)
     }
-    st.success("Text analysis completed.")
 if uploaded_file:
     captions = extract_captions_from_docx(uploaded_file)
     for caption, text in captions.items():
         output_data[caption] = {
-            'Full Caption': text,
-            'Language': detect_language(text),
-            'Tone of Caption': extract_tone(text),
-            'Hashtags': extract_hashtags(text),
-            'Hashtag Count': len(extract_hashtags(text)),
-            'Frames': extract_frames(text)
         }
-    st.success("DOCX file analysis completed.")
 if output_data:
-    docx_file = generate_docx(output_data)
-    st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")

 import streamlit as st
 import re
+import logging
 import nltk
 from docx import Document
 import io
+from langdetect import detect
+from transformers import pipeline
+from groq import ChatGroq
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Initialize logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+# Initialize LLM (Groq API)
+llm = ChatGroq(temperature=0.5, groq_api_key="GROQ_API_KEY", model_name="llama3-8b-8192")
 # Download required NLTK resources
+nltk.download("punkt")
+# Tone categories for fallback method
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
     "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
     "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
     "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
     "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
     "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
     "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
+# Frame categories for fallback method
 frame_categories = {
     "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
     "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
 def detect_language(text):
     try:
         return detect(text)
+    except Exception as e:
+        logging.error(f"Error detecting language: {e}")
         return "unknown"
+# Extract tone using Groq API (or fallback method)
 def extract_tone(text):
+    try:
+        response = llm.chat([
+            {"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
+            {"role": "user", "content": text}
+        ])
+        return response["choices"][0]["message"]["content"].split(", ")
+    except Exception as e:
+        logging.error(f"Groq API error: {e}")
+        return extract_tone_fallback(text)
+# Fallback method for tone extraction
+def extract_tone_fallback(text):
+    detected_tones = set()
+    text_lower = text.lower()
     for category, keywords in tone_categories.items():
+        if any(word in text_lower for word in keywords):
+            detected_tones.add(category)
+    return list(detected_tones) if detected_tones else ["Neutral"]
 # Extract hashtags
 def extract_hashtags(text):
     return re.findall(r"#\w+", text)
+# Extract frames using Groq API (or fallback)
+def extract_frames(text):
+    try:
+        response = llm.chat([
+            {"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
+            {"role": "user", "content": text}
+        ])
+        return response["choices"][0]["message"]["content"]
+    except Exception as e:
+        logging.error(f"Groq API error: {e}")
+        return extract_frames_fallback(text)
+# Fallback method for frame extraction
+def extract_frames_fallback(text):
+    detected_frames = set()
+    text_lower = text.lower()
+    for category, keywords in frame_categories.items():
+        if any(word in text_lower for word in keywords):
+            detected_frames.add(category)
+    return list(detected_frames)
 # Extract captions from DOCX
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
             captions[current_post] = []
         elif current_post:
             captions[current_post].append(text)
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
+# Streamlit app
+st.title("AI-Powered Activism Message Analyzer")
 st.write("Enter text or upload a DOCX file for analysis:")
 # File upload
 uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
+# Initialize output dictionary
 output_data = {}
 if input_text:
     output_data["Manual Input"] = {
+        "Full Caption": input_text,
+        "Language": detect_language(input_text),
+        "Tone": extract_tone(input_text),
+        "Hashtags": extract_hashtags(input_text),
+        "Frames": extract_frames(input_text),
     }
+    st.success("Analysis completed for text input.")
 if uploaded_file:
     captions = extract_captions_from_docx(uploaded_file)
     for caption, text in captions.items():
         output_data[caption] = {
+            "Full Caption": text,
+            "Language": detect_language(text),
+            "Tone": extract_tone(text),
+            "Hashtags": extract_hashtags(text),
+            "Frames": extract_frames(text),
         }
+    st.success(f"Analysis completed for {len(captions)} posts.")
+# Display results
 if output_data:
+    st.write(output_data)