AA_TT2

Sleeping

App Files Files Community

ahm14 commited on Feb 18

Commit

da716d7

verified ·

1 Parent(s): 442e623

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -78

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ llm = ChatGroq(temperature=0.5, groq_api_key=GROQ_API_KEY, model_name="llama3-8b
 # Download required NLTK resources
 nltk.download("punkt")
-# Updated tone categories
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
@@ -41,32 +41,92 @@ tone_categories = {
     "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
     "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
     "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
-    "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
     "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
     "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
     "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
-# Updated frame categories
 frame_categories = {
-    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
-    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
-    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
-    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
-    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
-    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
-    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
-    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
-    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
-    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
-    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
-    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
-    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
-    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
-    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
 }
 # Detect language
 def detect_language(text):
     try:
@@ -98,10 +158,6 @@ def extract_tone_fallback(text):
 def extract_hashtags(text):
     return re.findall(r"#\w+", text)
-# Extract hashtags
-def extract_hashtags(text):
-    return re.findall(r"#\w+", text)
 # Categorize frames into Major, Significant, and Minor based on frequency
 def categorize_frames(frame_list):
     frame_counter = Counter(frame_list)
@@ -110,9 +166,9 @@ def categorize_frames(frame_list):
     sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
     for i, (frame, count) in enumerate(sorted_frames):
-        if i == 0:
             categorized_frames["Major Focus"].append(frame)
-        elif i < 3:
             categorized_frames["Significant Focus"].append(frame)
         else:
             categorized_frames["Minor Mention"].append(frame)
@@ -120,27 +176,22 @@ def categorize_frames(frame_list):
     return categorized_frames
 # Extract frames using keyword matching and categorize
-def extract_frames_fallback(text):
     detected_frames = []
     text_lower = text.lower()
-    for category, keywords in frame_categories.items():
-        keyword_count = sum(1 for word in keywords if word in text_lower)
-        if keyword_count > 0:
-            detected_frames.append(category)
     return categorize_frames(detected_frames)
-# Extract metadata from Excel file
-def extract_metadata_from_excel(excel_file):
-    try:
-        df = pd.read_excel(excel_file)
-        extracted_data = df.to_dict(orient="records")
-        return extracted_data
-    except Exception as e:
-        logging.error(f"Error processing Excel file: {e}")
-        return []
 # Extract captions from DOCX
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
@@ -155,51 +206,49 @@ def extract_captions_from_docx(docx_file):
             captions[current_post].append(text)
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
-# Merge metadata and captions together
-def merge_metadata_with_captions(metadata, captions):
-    merged_data = []
-    for i, meta in enumerate(metadata):
-        post_number = f"Post {i+1}"
-        caption_text = captions.get(post_number, "No caption available")
-        post_data = meta.copy()
-        post_data["Full Caption"] = caption_text
-        post_data["Language"] = detect_language(caption_text)
-        post_data["Tone"] = extract_tone(caption_text)
-        post_data["Hashtags"] = extract_hashtags(caption_text)
-        post_data["Frames"] = extract_frames_fallback(caption_text)
-        merged_data.append(post_data)
-    return merged_data
-# Create DOCX file with correct formatting
 def create_docx_from_data(extracted_data):
     doc = Document()
-    for index, data in enumerate(extracted_data, start=1):
-        doc.add_heading(f"Sr No {index}:", level=1)
-        metadata_fields = [
-            "Date of Post", "Media Type", "Number of Pictures", "Number of Videos",
-            "Number of Audios", "Likes", "Comments", "Tagged Audience"
         ]
-        for field in metadata_fields:
-            value = data.get(field, "N/A")
-            doc.add_paragraph(f"**{field}:** {value}")
-        doc.add_paragraph(f"**Caption:** {data.get('Full Caption', 'N/A')}")
-        doc.add_paragraph(f"**Language:** {data.get('Language', 'N/A')}")
-        doc.add_paragraph(f"**Tone:** {', '.join(data.get('Tone', ['N/A']))}")
-        doc.add_paragraph(f"**Hashtags:** {', '.join(data.get('Hashtags', []))}")
-        frames = data.get("Frames", {})
-        doc.add_paragraph("**Frames:**")
-        for category, frame_list in frames.items():
-            if frame_list:
-                doc.add_paragraph(f"  {category}: {', '.join(frame_list)}")
         doc.add_paragraph("\n")
@@ -208,17 +257,49 @@ def create_docx_from_data(extracted_data):
 # Streamlit app
 st.title("AI-Powered Activism Message Analyzer")
 uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
 uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
-if uploaded_excel and uploaded_docx:
     excel_metadata = extract_metadata_from_excel(uploaded_excel)
-    docx_captions = extract_captions_from_docx(uploaded_docx)
-    merged_data = merge_metadata_with_captions(excel_metadata, docx_captions)
-    docx_output = create_docx_from_data(merged_data)
     docx_io = io.BytesIO()
     docx_output.save(docx_io)
     docx_io.seek(0)
     st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")

 # Download required NLTK resources
 nltk.download("punkt")
+# Tone categories for fallback method
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
     "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
     "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
     "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
     "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
     "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
     "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
+# Frame categories for fallback method
 frame_categories = {
+    "Human Rights & Justice": {
+        "Legal Rights & Reforms": ["law", "justice", "legal", "reforms", "legislation"],
+        "Humanitarian Issues": ["humanitarian", "aid", "refugees", "asylum", "crisis response"],
+        "Civil Liberties": ["freedom", "expression", "privacy", "rights violations"]
+    },
+    "Political & State Accountability": {
+        "Corruption & Governance": ["corruption", "government", "policy", "accountability", "transparency"],
+        "Political Oppression": ["authoritarianism", "censorship", "state control", "dissent", "crackdown"],
+        "Elections & Political Representation": ["voting", "elections", "political participation", "democracy"]
+    },
+    "Gender & Patriarchy": {
+        "Gender-Based Violence": ["violence", "domestic abuse", "sexual harassment", "femicide"],
+        "Women's Rights & Equality": ["gender equality", "feminism", "reproductive rights", "patriarchy"],
+        "LGBTQ+ Rights": ["queer rights", "LGBTQ+", "gender identity", "trans rights", "homophobia"]
+    },
+    "Religious Freedom & Persecution": {
+        "Religious Discrimination": ["persecution", "intolerance", "sectarianism", "faith-based violence"],
+        "Religious Minorities' Rights": ["minorities", "blasphemy laws", "religious freedom", "forced conversion"]
+    },
+    "Grassroots Mobilization": {
+        "Community Activism": ["activism", "grassroots", "volunteering", "local organizing"],
+        "Protests & Demonstrations": ["march", "strike", "rally", "sit-in", "boycott"],
+        "Coalition Building": ["solidarity", "collaboration", "alliances", "mutual aid"]
+    },
+    "Environmental Crisis & Activism": {
+        "Climate Change Awareness": ["climate crisis", "global warming", "carbon emissions", "fossil fuels"],
+        "Conservation & Sustainability": ["deforestation", "wildlife protection", "biodiversity"],
+        "Environmental Justice": ["pollution", "water crisis", "land rights", "indigenous rights"]
+    },
+    "Anti-Extremism & Anti-Violence": {
+        "Hate Speech & Radicalization": ["hate speech", "extremism", "online radicalization", "propaganda"],
+        "Mob & Sectarian Violence": ["mob attack", "lynching", "sectarian violence", "hate crimes"],
+        "Counterterrorism & De-Radicalization": ["terrorism", "prevention", "peacebuilding", "rehabilitation"]
+    },
+    "Social Inequality & Economic Disparities": {
+        "Class Privilege & Labor Rights": ["classism", "labor rights", "unions", "wage gap"],
+        "Poverty & Economic Justice": ["poverty", "inequality", "economic disparity", "wealth gap"],
+        "Housing & Healthcare": ["housing crisis", "healthcare access", "social safety nets"]
+    },
+    "Activism & Advocacy": {
+        "Policy Advocacy & Legal Reforms": ["campaign", "policy change", "legal advocacy"],
+        "Social Media Activism": ["hashtags", "digital activism", "awareness campaign"],
+        "Freedom of Expression & Press": ["press freedom", "censorship", "media rights"]
+    },
+    "Systemic Oppression": {
+        "Marginalized Communities": ["minorities", "exclusion", "systemic discrimination"],
+        "Racial & Ethnic Discrimination": ["racism", "xenophobia", "ethnic cleansing", "casteism"],
+        "Institutional Bias": ["institutional racism", "structural oppression", "biased laws"]
+    },
+    "Intersectionality": {
+        "Multiple Oppressions": ["overlapping struggles", "intersecting identities", "double discrimination"],
+        "Women & Marginalized Identities": ["feminism", "queer feminism", "minority women"],
+        "Global Solidarity Movements": ["transnational activism", "cross-movement solidarity"]
+    },
+    "Call to Action": {
+        "Petitions & Direct Action": ["sign petition", "protest", "boycott"],
+        "Fundraising & Support": ["donate", "crowdfunding", "aid support"],
+        "Policy & Legislative Action": ["policy change", "demand action", "write to lawmakers"]
+    },
+    "Empowerment & Resistance": {
+        "Grassroots Organizing": ["community empowerment", "leadership training"],
+        "Revolutionary Movements": ["resistance", "revolt", "revolutionary change"],
+        "Inspiration & Motivational Messaging": ["hope", "courage", "overcoming struggles"]
+    },
+    "Climate Justice": {
+        "Indigenous Environmental Activism": ["land rights", "indigenous climate leadership"],
+        "Corporate Accountability": ["big oil", "corporate greed", "environmental negligence"],
+        "Sustainable Development": ["eco-friendly", "renewable energy", "circular economy"]
+    },
+    "Human Rights Advocacy": {
+        "Criminal Justice Reform": ["police brutality", "wrongful convictions", "prison reform"],
+        "Workplace Discrimination & Labor Rights": ["workplace bias", "equal pay", "unions"],
+        "International Human Rights": ["humanitarian law", "UN declarations", "international treaties"]
+    }
 }
 # Detect language
 def detect_language(text):
     try:
 def extract_hashtags(text):
     return re.findall(r"#\w+", text)
 # Categorize frames into Major, Significant, and Minor based on frequency
 def categorize_frames(frame_list):
     frame_counter = Counter(frame_list)
     sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
     for i, (frame, count) in enumerate(sorted_frames):
+        if i == 0:  # Highest frequency frame
             categorized_frames["Major Focus"].append(frame)
+        elif i < 3:  # Top 3 most mentioned frames
             categorized_frames["Significant Focus"].append(frame)
         else:
             categorized_frames["Minor Mention"].append(frame)
     return categorized_frames
 # Extract frames using keyword matching and categorize
+def extract_frames_fallback(text, frame_categories):
     detected_frames = []
     text_lower = text.lower()
+    # Iterate through the activism topics to match keywords
+    for main_category, subcategories in frame_categories.items():
+        for subcategory, keywords in subcategories.items():
+            # Check how many keywords from the subcategory are present in the text
+            keyword_count = sum(1 for word in keywords if word in text_lower)
+            if keyword_count > 0:
+                # Append a tuple with main category and subcategory
+                detected_frames.append((main_category, subcategory))
+    # Categorize detected frames based on their frequency
     return categorize_frames(detected_frames)
 # Extract captions from DOCX
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
             captions[current_post].append(text)
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
+# Extract metadata from Excel file
+def extract_metadata_from_excel(excel_file):
+    try:
+        df = pd.read_excel(excel_file)
+        extracted_data = df.to_dict(orient="records")
+        return extracted_data
+    except Exception as e:
+        logging.error(f"Error processing Excel file: {e}")
+        return []
+# Merge metadata with generated analysis
+def merge_metadata_with_generated_data(generated_data, excel_metadata):
+    for post_data in excel_metadata:
+        post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
+        if post_number in generated_data:
+            generated_data[post_number].update(post_data)
+        else:
+            generated_data[post_number] = post_data
+    return generated_data
+# Create DOCX file matching the uploaded format
 def create_docx_from_data(extracted_data):
     doc = Document()
+    for post_number, data in extracted_data.items():
+        doc.add_heading(post_number, level=1)
+        ordered_keys = [
+            "Post Number", "Date of Post", "Media Type", "Number of Pictures",
+            "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
+            "Full Caption", "Language", "Tone", "Hashtags", "Frames"
         ]
+        for key in ordered_keys:
+            value = data.get(key, "N/A")
+            if key in ["Tone", "Hashtags"]:
+                value = ", ".join(value) if isinstance(value, list) else value
+            elif key == "Frames" and isinstance(value, dict):
+                frame_text = "\n".join([f"  {category}: {', '.join(frames)}" for category, frames in value.items() if frames])
+                value = f"\n{frame_text}" if frame_text else "N/A"
+            doc.add_paragraph(f"**{key}:** {value}")
         doc.add_paragraph("\n")
 # Streamlit app
 st.title("AI-Powered Activism Message Analyzer")
+st.write("Enter text or upload a DOCX/Excel file for analysis:")
+input_text = st.text_area("Input Text", height=200)
 uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
 uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
+output_data = {}
+if input_text:
+    output_data["Manual Input"] = {
+        "Full Caption": input_text,
+        "Language": detect_language(input_text),
+        "Tone": extract_tone(input_text),
+        "Hashtags": extract_hashtags(input_text),
+        "Frames": extract_frames_fallback(input_text),
+    }
+if uploaded_docx:
+    captions = extract_captions_from_docx(uploaded_docx)
+    for caption, text in captions.items():
+        output_data[caption] = {
+            "Full Caption": text,
+            "Language": detect_language(text),
+            "Tone": extract_tone(text),
+            "Hashtags": extract_hashtags(text),
+            "Frames": extract_frames_fallback(text),
+        }
+if uploaded_excel:
     excel_metadata = extract_metadata_from_excel(uploaded_excel)
+    output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
+# Display results in collapsible sections for better UI
+if output_data:
+    for post_number, data in output_data.items():
+        with st.expander(post_number):
+            for key, value in data.items():
+                st.write(f"**{key}:** {value}")
+if output_data:
+    docx_output = create_docx_from_data(output_data)
     docx_io = io.BytesIO()
     docx_output.save(docx_io)
     docx_io.seek(0)
     st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")