ahm14 commited on
Commit
773ca30
·
verified ·
1 Parent(s): 5c3fa48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -85
app.py CHANGED
@@ -9,7 +9,7 @@ import io
9
  # Download required NLTK resources
10
  nltk.download('punkt')
11
 
12
- # Updated tone categories
13
  tone_categories = {
14
  "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
15
  "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
@@ -24,7 +24,7 @@ tone_categories = {
24
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
25
  }
26
 
27
- # Updated frame categories
28
  frame_categories = {
29
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
30
  "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
@@ -47,43 +47,48 @@ frame_categories = {
47
  def detect_language(text):
48
  try:
49
  return detect(text)
50
- except Exception as e:
51
- st.write(f"Error detecting language: {e}")
52
  return "unknown"
53
 
54
- # Analyze tone based on predefined categories
55
- def analyze_tone(text):
56
- detected_tones = set()
57
  for category, keywords in tone_categories.items():
58
- if any(word in text.lower() for word in keywords):
59
- detected_tones.add(category)
60
-
61
- if not detected_tones:
62
- tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
63
- model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
64
- detected_tones.update(model_result["labels"][:2])
65
-
66
- return list(detected_tones)
67
-
68
- # Extract hashtags
69
- def extract_hashtags(text):
70
- return re.findall(r"#\w+", text)
71
-
72
- # Extract frames based on predefined categories
73
  def extract_frames(text):
74
- detected_frames = set()
75
  for category, keywords in frame_categories.items():
76
- if any(word in text.lower() for word in keywords):
77
- detected_frames.add(category)
 
78
 
79
  if not detected_frames:
80
  frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
81
  model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
82
- detected_frames.update(model_result["labels"][:2])
 
83
 
84
- return list(detected_frames)
 
 
 
 
85
 
86
- # Extract captions from DOCX file based on "Post X"
87
  def extract_captions_from_docx(docx_file):
88
  doc = Document(docx_file)
89
  captions = {}
@@ -98,7 +103,7 @@ def extract_captions_from_docx(docx_file):
98
 
99
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
100
 
101
- # Generate a DOCX file in-memory with full captions
102
  def generate_docx(output_data):
103
  doc = Document()
104
  doc.add_heading('Activism Message Analysis', 0)
@@ -114,87 +119,51 @@ def generate_docx(output_data):
114
  doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
115
 
116
  doc.add_heading('Frames:', level=2)
117
- for frame in result['Frames']:
118
- doc.add_paragraph(frame)
119
 
120
  doc_io = io.BytesIO()
121
  doc.save(doc_io)
122
  doc_io.seek(0)
123
-
124
  return doc_io
125
 
126
- # Streamlit app
127
- st.title('AI-Powered Activism Message Analyzer with Intersectionality')
128
 
129
- st.write("Enter the text to analyze or upload a DOCX file containing captions:")
130
 
131
- # Text Input
132
  input_text = st.text_area("Input Text", height=200)
133
 
134
- # File Upload
135
  uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
136
 
137
- # Initialize output dictionary
138
  output_data = {}
139
 
140
  if input_text:
141
- language = detect_language(input_text)
142
- tone = analyze_tone(input_text)
143
- hashtags = extract_hashtags(input_text)
144
- frames = extract_frames(input_text)
145
-
146
  output_data["Manual Input"] = {
147
  'Full Caption': input_text,
148
- 'Language': language,
149
- 'Tone of Caption': tone,
150
- 'Hashtags': hashtags,
151
- 'Hashtag Count': len(hashtags),
152
- 'Frames': frames
153
  }
154
-
155
- st.success("Analysis completed for text input.")
156
 
157
  if uploaded_file:
158
  captions = extract_captions_from_docx(uploaded_file)
159
  for caption, text in captions.items():
160
- language = detect_language(text)
161
- tone = analyze_tone(text)
162
- hashtags = extract_hashtags(text)
163
- frames = extract_frames(text)
164
-
165
  output_data[caption] = {
166
  'Full Caption': text,
167
- 'Language': language,
168
- 'Tone of Caption': tone,
169
- 'Hashtags': hashtags,
170
- 'Hashtag Count': len(hashtags),
171
- 'Frames': frames
172
  }
 
173
 
174
- st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
175
-
176
- # Display results
177
  if output_data:
178
- with st.expander("Generated Output"):
179
- st.subheader("Analysis Results")
180
- for index, (caption, result) in enumerate(output_data.items(), start=1):
181
- st.write(f"### {index}. {caption}")
182
- st.write("**Full Caption:**")
183
- st.write(f"> {result['Full Caption']}")
184
- st.write(f"**Language**: {result['Language']}")
185
- st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
186
- st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
187
- st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
188
- st.write("**Frames**:")
189
- for frame in result['Frames']:
190
- st.write(f"- {frame}")
191
-
192
  docx_file = generate_docx(output_data)
193
-
194
- if docx_file:
195
- st.download_button(
196
- label="Download Analysis as DOCX",
197
- data=docx_file,
198
- file_name="activism_message_analysis.docx",
199
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
200
- )
 
9
  # Download required NLTK resources
10
  nltk.download('punkt')
11
 
12
+ # Tone categories
13
  tone_categories = {
14
  "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
15
  "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
 
24
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
25
  }
26
 
27
+ # Frame categories
28
  frame_categories = {
29
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
30
  "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
 
47
  def detect_language(text):
48
  try:
49
  return detect(text)
50
+ except:
 
51
  return "unknown"
52
 
53
+ # Extract tone
54
+ def extract_tone(text):
55
+ detected_tones = []
56
  for category, keywords in tone_categories.items():
57
+ if any(keyword in text.lower() for keyword in keywords):
58
+ detected_tones.append(category)
59
+ return detected_tones if detected_tones else ["Neutral"]
60
+
61
+ # Categorize frames based on importance
62
+ def categorize_frame_importance(text, keywords):
63
+ keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
64
+ if keyword_count > 2:
65
+ return "Major Focus"
66
+ elif keyword_count == 1 or keyword_count == 2:
67
+ return "Significant Focus"
68
+ else:
69
+ return "Minor Mention"
70
+
71
+ # Extract frames with categorization
72
  def extract_frames(text):
73
+ detected_frames = {}
74
  for category, keywords in frame_categories.items():
75
+ importance = categorize_frame_importance(text, keywords)
76
+ if importance != "Minor Mention":
77
+ detected_frames[category] = importance
78
 
79
  if not detected_frames:
80
  frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
81
  model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
82
+ for label in model_result["labels"][:2]: # Top 2 frames
83
+ detected_frames[label] = "Significant Focus"
84
 
85
+ return detected_frames
86
+
87
+ # Extract hashtags
88
+ def extract_hashtags(text):
89
+ return re.findall(r"#\w+", text)
90
 
91
+ # Extract captions from DOCX
92
  def extract_captions_from_docx(docx_file):
93
  doc = Document(docx_file)
94
  captions = {}
 
103
 
104
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
105
 
106
+ # Generate a DOCX file
107
  def generate_docx(output_data):
108
  doc = Document()
109
  doc.add_heading('Activism Message Analysis', 0)
 
119
  doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
120
 
121
  doc.add_heading('Frames:', level=2)
122
+ for frame, importance in result['Frames'].items():
123
+ doc.add_paragraph(f"{frame}: {importance}")
124
 
125
  doc_io = io.BytesIO()
126
  doc.save(doc_io)
127
  doc_io.seek(0)
 
128
  return doc_io
129
 
130
+ # Streamlit UI
131
+ st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
132
 
133
+ st.write("Enter text or upload a DOCX file for analysis:")
134
 
135
+ # Text input
136
  input_text = st.text_area("Input Text", height=200)
137
 
138
+ # File upload
139
  uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
140
 
 
141
  output_data = {}
142
 
143
  if input_text:
 
 
 
 
 
144
  output_data["Manual Input"] = {
145
  'Full Caption': input_text,
146
+ 'Language': detect_language(input_text),
147
+ 'Tone of Caption': extract_tone(input_text),
148
+ 'Hashtags': extract_hashtags(input_text),
149
+ 'Hashtag Count': len(extract_hashtags(input_text)),
150
+ 'Frames': extract_frames(input_text)
151
  }
152
+ st.success("Text analysis completed.")
 
153
 
154
  if uploaded_file:
155
  captions = extract_captions_from_docx(uploaded_file)
156
  for caption, text in captions.items():
 
 
 
 
 
157
  output_data[caption] = {
158
  'Full Caption': text,
159
+ 'Language': detect_language(text),
160
+ 'Tone of Caption': extract_tone(text),
161
+ 'Hashtags': extract_hashtags(text),
162
+ 'Hashtag Count': len(extract_hashtags(text)),
163
+ 'Frames': extract_frames(text)
164
  }
165
+ st.success("DOCX file analysis completed.")
166
 
 
 
 
167
  if output_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  docx_file = generate_docx(output_data)
169
+ st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")