AA_TT1

Sleeping

App Files Files Community

ahm14 commited on Feb 17

Commit

773ca30

verified ·

1 Parent(s): 5c3fa48

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -85

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import io
 # Download required NLTK resources
 nltk.download('punkt')
-# Updated tone categories
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
@@ -24,7 +24,7 @@ tone_categories = {
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
-# Updated frame categories
 frame_categories = {
     "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
     "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
@@ -47,43 +47,48 @@ frame_categories = {
 def detect_language(text):
     try:
         return detect(text)
-    except Exception as e:
-        st.write(f"Error detecting language: {e}")
         return "unknown"
-# Analyze tone based on predefined categories
-def analyze_tone(text):
-    detected_tones = set()
     for category, keywords in tone_categories.items():
-        if any(word in text.lower() for word in keywords):
-            detected_tones.add(category)
-    if not detected_tones:
-        tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-        model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
-        detected_tones.update(model_result["labels"][:2])
-    return list(detected_tones)
-# Extract hashtags
-def extract_hashtags(text):
-    return re.findall(r"#\w+", text)
-# Extract frames based on predefined categories
 def extract_frames(text):
-    detected_frames = set()
     for category, keywords in frame_categories.items():
-        if any(word in text.lower() for word in keywords):
-            detected_frames.add(category)
     if not detected_frames:
         frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
-        detected_frames.update(model_result["labels"][:2])
-    return list(detected_frames)
-# Extract captions from DOCX file based on "Post X"
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
     captions = {}
@@ -98,7 +103,7 @@ def extract_captions_from_docx(docx_file):
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
-# Generate a DOCX file in-memory with full captions
 def generate_docx(output_data):
     doc = Document()
     doc.add_heading('Activism Message Analysis', 0)
@@ -114,87 +119,51 @@ def generate_docx(output_data):
         doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
         doc.add_heading('Frames:', level=2)
-        for frame in result['Frames']:
-            doc.add_paragraph(frame)
     doc_io = io.BytesIO()
     doc.save(doc_io)
     doc_io.seek(0)
     return doc_io
-# Streamlit app
-st.title('AI-Powered Activism Message Analyzer with Intersectionality')
-st.write("Enter the text to analyze or upload a DOCX file containing captions:")
-# Text Input
 input_text = st.text_area("Input Text", height=200)
-# File Upload
 uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
-# Initialize output dictionary
 output_data = {}
 if input_text:
-    language = detect_language(input_text)
-    tone = analyze_tone(input_text)
-    hashtags = extract_hashtags(input_text)
-    frames = extract_frames(input_text)
     output_data["Manual Input"] = {
         'Full Caption': input_text,
-        'Language': language,
-        'Tone of Caption': tone,
-        'Hashtags': hashtags,
-        'Hashtag Count': len(hashtags),
-        'Frames': frames
     }
-    st.success("Analysis completed for text input.")
 if uploaded_file:
     captions = extract_captions_from_docx(uploaded_file)
     for caption, text in captions.items():
-        language = detect_language(text)
-        tone = analyze_tone(text)
-        hashtags = extract_hashtags(text)
-        frames = extract_frames(text)
         output_data[caption] = {
             'Full Caption': text,
-            'Language': language,
-            'Tone of Caption': tone,
-            'Hashtags': hashtags,
-            'Hashtag Count': len(hashtags),
-            'Frames': frames
         }
-    st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
-# Display results
 if output_data:
-    with st.expander("Generated Output"):
-        st.subheader("Analysis Results")
-        for index, (caption, result) in enumerate(output_data.items(), start=1):
-            st.write(f"### {index}. {caption}")
-            st.write("**Full Caption:**")
-            st.write(f"> {result['Full Caption']}")
-            st.write(f"**Language**: {result['Language']}")
-            st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
-            st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
-            st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
-            st.write("**Frames**:")
-            for frame in result['Frames']:
-                st.write(f"- {frame}")
     docx_file = generate_docx(output_data)
-    if docx_file:
-        st.download_button(
-            label="Download Analysis as DOCX",
-            data=docx_file,
-            file_name="activism_message_analysis.docx",
-            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-        )

 # Download required NLTK resources
 nltk.download('punkt')
+# Tone categories
 tone_categories = {
     "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
     "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
     "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
 }
+# Frame categories
 frame_categories = {
     "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
     "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
 def detect_language(text):
     try:
         return detect(text)
+    except:
         return "unknown"
+# Extract tone
+def extract_tone(text):
+    detected_tones = []
     for category, keywords in tone_categories.items():
+        if any(keyword in text.lower() for keyword in keywords):
+            detected_tones.append(category)
+    return detected_tones if detected_tones else ["Neutral"]
+# Categorize frames based on importance
+def categorize_frame_importance(text, keywords):
+    keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
+    if keyword_count > 2:
+        return "Major Focus"
+    elif keyword_count == 1 or keyword_count == 2:
+        return "Significant Focus"
+    else:
+        return "Minor Mention"
+# Extract frames with categorization
 def extract_frames(text):
+    detected_frames = {}
     for category, keywords in frame_categories.items():
+        importance = categorize_frame_importance(text, keywords)
+        if importance != "Minor Mention":
+            detected_frames[category] = importance
     if not detected_frames:
         frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
+        for label in model_result["labels"][:2]:  # Top 2 frames
+            detected_frames[label] = "Significant Focus"
+    return detected_frames
+# Extract hashtags
+def extract_hashtags(text):
+    return re.findall(r"#\w+", text)
+# Extract captions from DOCX
 def extract_captions_from_docx(docx_file):
     doc = Document(docx_file)
     captions = {}
     return {post: " ".join(lines) for post, lines in captions.items() if lines}
+# Generate a DOCX file
 def generate_docx(output_data):
     doc = Document()
     doc.add_heading('Activism Message Analysis', 0)
         doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
         doc.add_heading('Frames:', level=2)
+        for frame, importance in result['Frames'].items():
+            doc.add_paragraph(f"{frame}: {importance}")
     doc_io = io.BytesIO()
     doc.save(doc_io)
     doc_io.seek(0)
     return doc_io
+# Streamlit UI
+st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
+st.write("Enter text or upload a DOCX file for analysis:")
+# Text input
 input_text = st.text_area("Input Text", height=200)
+# File upload
 uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
 output_data = {}
 if input_text:
     output_data["Manual Input"] = {
         'Full Caption': input_text,
+        'Language': detect_language(input_text),
+        'Tone of Caption': extract_tone(input_text),
+        'Hashtags': extract_hashtags(input_text),
+        'Hashtag Count': len(extract_hashtags(input_text)),
+        'Frames': extract_frames(input_text)
     }
+    st.success("Text analysis completed.")
 if uploaded_file:
     captions = extract_captions_from_docx(uploaded_file)
     for caption, text in captions.items():
         output_data[caption] = {
             'Full Caption': text,
+            'Language': detect_language(text),
+            'Tone of Caption': extract_tone(text),
+            'Hashtags': extract_hashtags(text),
+            'Hashtag Count': len(extract_hashtags(text)),
+            'Frames': extract_frames(text)
         }
+    st.success("DOCX file analysis completed.")
 if output_data:
     docx_file = generate_docx(output_data)
+    st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")