Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import io
|
|
9 |
# Download required NLTK resources
|
10 |
nltk.download('punkt')
|
11 |
|
12 |
-
#
|
13 |
tone_categories = {
|
14 |
"Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
|
15 |
"Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
|
@@ -24,7 +24,7 @@ tone_categories = {
|
|
24 |
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
|
25 |
}
|
26 |
|
27 |
-
#
|
28 |
frame_categories = {
|
29 |
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
|
30 |
"Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
|
@@ -47,43 +47,48 @@ frame_categories = {
|
|
47 |
def detect_language(text):
|
48 |
try:
|
49 |
return detect(text)
|
50 |
-
except
|
51 |
-
st.write(f"Error detecting language: {e}")
|
52 |
return "unknown"
|
53 |
|
54 |
-
#
|
55 |
-
def
|
56 |
-
detected_tones =
|
57 |
for category, keywords in tone_categories.items():
|
58 |
-
if any(
|
59 |
-
detected_tones.
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
# Extract frames
|
73 |
def extract_frames(text):
|
74 |
-
detected_frames =
|
75 |
for category, keywords in frame_categories.items():
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
if not detected_frames:
|
80 |
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
81 |
model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
|
82 |
-
|
|
|
83 |
|
84 |
-
return
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
# Extract captions from DOCX
|
87 |
def extract_captions_from_docx(docx_file):
|
88 |
doc = Document(docx_file)
|
89 |
captions = {}
|
@@ -98,7 +103,7 @@ def extract_captions_from_docx(docx_file):
|
|
98 |
|
99 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
100 |
|
101 |
-
# Generate a DOCX file
|
102 |
def generate_docx(output_data):
|
103 |
doc = Document()
|
104 |
doc.add_heading('Activism Message Analysis', 0)
|
@@ -114,87 +119,51 @@ def generate_docx(output_data):
|
|
114 |
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
|
115 |
|
116 |
doc.add_heading('Frames:', level=2)
|
117 |
-
for frame in result['Frames']:
|
118 |
-
doc.add_paragraph(frame)
|
119 |
|
120 |
doc_io = io.BytesIO()
|
121 |
doc.save(doc_io)
|
122 |
doc_io.seek(0)
|
123 |
-
|
124 |
return doc_io
|
125 |
|
126 |
-
# Streamlit
|
127 |
-
st.title('AI-Powered Activism Message Analyzer with
|
128 |
|
129 |
-
st.write("Enter
|
130 |
|
131 |
-
# Text
|
132 |
input_text = st.text_area("Input Text", height=200)
|
133 |
|
134 |
-
# File
|
135 |
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
|
136 |
|
137 |
-
# Initialize output dictionary
|
138 |
output_data = {}
|
139 |
|
140 |
if input_text:
|
141 |
-
language = detect_language(input_text)
|
142 |
-
tone = analyze_tone(input_text)
|
143 |
-
hashtags = extract_hashtags(input_text)
|
144 |
-
frames = extract_frames(input_text)
|
145 |
-
|
146 |
output_data["Manual Input"] = {
|
147 |
'Full Caption': input_text,
|
148 |
-
'Language':
|
149 |
-
'Tone of Caption':
|
150 |
-
'Hashtags':
|
151 |
-
'Hashtag Count': len(
|
152 |
-
'Frames':
|
153 |
}
|
154 |
-
|
155 |
-
st.success("Analysis completed for text input.")
|
156 |
|
157 |
if uploaded_file:
|
158 |
captions = extract_captions_from_docx(uploaded_file)
|
159 |
for caption, text in captions.items():
|
160 |
-
language = detect_language(text)
|
161 |
-
tone = analyze_tone(text)
|
162 |
-
hashtags = extract_hashtags(text)
|
163 |
-
frames = extract_frames(text)
|
164 |
-
|
165 |
output_data[caption] = {
|
166 |
'Full Caption': text,
|
167 |
-
'Language':
|
168 |
-
'Tone of Caption':
|
169 |
-
'Hashtags':
|
170 |
-
'Hashtag Count': len(
|
171 |
-
'Frames':
|
172 |
}
|
|
|
173 |
|
174 |
-
st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
|
175 |
-
|
176 |
-
# Display results
|
177 |
if output_data:
|
178 |
-
with st.expander("Generated Output"):
|
179 |
-
st.subheader("Analysis Results")
|
180 |
-
for index, (caption, result) in enumerate(output_data.items(), start=1):
|
181 |
-
st.write(f"### {index}. {caption}")
|
182 |
-
st.write("**Full Caption:**")
|
183 |
-
st.write(f"> {result['Full Caption']}")
|
184 |
-
st.write(f"**Language**: {result['Language']}")
|
185 |
-
st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
|
186 |
-
st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
|
187 |
-
st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
|
188 |
-
st.write("**Frames**:")
|
189 |
-
for frame in result['Frames']:
|
190 |
-
st.write(f"- {frame}")
|
191 |
-
|
192 |
docx_file = generate_docx(output_data)
|
193 |
-
|
194 |
-
if docx_file:
|
195 |
-
st.download_button(
|
196 |
-
label="Download Analysis as DOCX",
|
197 |
-
data=docx_file,
|
198 |
-
file_name="activism_message_analysis.docx",
|
199 |
-
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
200 |
-
)
|
|
|
9 |
# Download required NLTK resources
|
10 |
nltk.download('punkt')
|
11 |
|
12 |
+
# Tone categories
|
13 |
tone_categories = {
|
14 |
"Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
|
15 |
"Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
|
|
|
24 |
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
|
25 |
}
|
26 |
|
27 |
+
# Frame categories
|
28 |
frame_categories = {
|
29 |
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
|
30 |
"Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
|
|
|
47 |
def detect_language(text):
|
48 |
try:
|
49 |
return detect(text)
|
50 |
+
except:
|
|
|
51 |
return "unknown"
|
52 |
|
53 |
+
# Extract tone
|
54 |
+
def extract_tone(text):
|
55 |
+
detected_tones = []
|
56 |
for category, keywords in tone_categories.items():
|
57 |
+
if any(keyword in text.lower() for keyword in keywords):
|
58 |
+
detected_tones.append(category)
|
59 |
+
return detected_tones if detected_tones else ["Neutral"]
|
60 |
+
|
61 |
+
# Categorize frames based on importance
|
62 |
+
def categorize_frame_importance(text, keywords):
|
63 |
+
keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
|
64 |
+
if keyword_count > 2:
|
65 |
+
return "Major Focus"
|
66 |
+
elif keyword_count == 1 or keyword_count == 2:
|
67 |
+
return "Significant Focus"
|
68 |
+
else:
|
69 |
+
return "Minor Mention"
|
70 |
+
|
71 |
+
# Extract frames with categorization
|
72 |
def extract_frames(text):
|
73 |
+
detected_frames = {}
|
74 |
for category, keywords in frame_categories.items():
|
75 |
+
importance = categorize_frame_importance(text, keywords)
|
76 |
+
if importance != "Minor Mention":
|
77 |
+
detected_frames[category] = importance
|
78 |
|
79 |
if not detected_frames:
|
80 |
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
81 |
model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
|
82 |
+
for label in model_result["labels"][:2]: # Top 2 frames
|
83 |
+
detected_frames[label] = "Significant Focus"
|
84 |
|
85 |
+
return detected_frames
|
86 |
+
|
87 |
+
# Extract hashtags
|
88 |
+
def extract_hashtags(text):
|
89 |
+
return re.findall(r"#\w+", text)
|
90 |
|
91 |
+
# Extract captions from DOCX
|
92 |
def extract_captions_from_docx(docx_file):
|
93 |
doc = Document(docx_file)
|
94 |
captions = {}
|
|
|
103 |
|
104 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
105 |
|
106 |
+
# Generate a DOCX file
|
107 |
def generate_docx(output_data):
|
108 |
doc = Document()
|
109 |
doc.add_heading('Activism Message Analysis', 0)
|
|
|
119 |
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
|
120 |
|
121 |
doc.add_heading('Frames:', level=2)
|
122 |
+
for frame, importance in result['Frames'].items():
|
123 |
+
doc.add_paragraph(f"{frame}: {importance}")
|
124 |
|
125 |
doc_io = io.BytesIO()
|
126 |
doc.save(doc_io)
|
127 |
doc_io.seek(0)
|
|
|
128 |
return doc_io
|
129 |
|
130 |
+
# Streamlit UI
|
131 |
+
st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
|
132 |
|
133 |
+
st.write("Enter text or upload a DOCX file for analysis:")
|
134 |
|
135 |
+
# Text input
|
136 |
input_text = st.text_area("Input Text", height=200)
|
137 |
|
138 |
+
# File upload
|
139 |
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
|
140 |
|
|
|
141 |
output_data = {}
|
142 |
|
143 |
if input_text:
|
|
|
|
|
|
|
|
|
|
|
144 |
output_data["Manual Input"] = {
|
145 |
'Full Caption': input_text,
|
146 |
+
'Language': detect_language(input_text),
|
147 |
+
'Tone of Caption': extract_tone(input_text),
|
148 |
+
'Hashtags': extract_hashtags(input_text),
|
149 |
+
'Hashtag Count': len(extract_hashtags(input_text)),
|
150 |
+
'Frames': extract_frames(input_text)
|
151 |
}
|
152 |
+
st.success("Text analysis completed.")
|
|
|
153 |
|
154 |
if uploaded_file:
|
155 |
captions = extract_captions_from_docx(uploaded_file)
|
156 |
for caption, text in captions.items():
|
|
|
|
|
|
|
|
|
|
|
157 |
output_data[caption] = {
|
158 |
'Full Caption': text,
|
159 |
+
'Language': detect_language(text),
|
160 |
+
'Tone of Caption': extract_tone(text),
|
161 |
+
'Hashtags': extract_hashtags(text),
|
162 |
+
'Hashtag Count': len(extract_hashtags(text)),
|
163 |
+
'Frames': extract_frames(text)
|
164 |
}
|
165 |
+
st.success("DOCX file analysis completed.")
|
166 |
|
|
|
|
|
|
|
167 |
if output_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
docx_file = generate_docx(output_data)
|
169 |
+
st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")
|
|
|
|
|
|
|
|
|
|
|
|
|
|