Update app.py
Browse files
app.py
CHANGED
@@ -46,198 +46,26 @@ tone_categories = {
|
|
46 |
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
|
47 |
}
|
48 |
|
49 |
-
#
|
50 |
frame_categories = {
|
51 |
-
"Human Rights & Justice":
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
"
|
65 |
-
|
66 |
-
"Political Oppression": ["authoritarianism", "censorship", "state control", "dissent", "political prisoners", "dictatorship", "crackdown", "enforced disappearances"],
|
67 |
-
"Elections & Political Representation": ["voting", "elections", "political participation", "democracy", "voter suppression", "fraud", "ballot", "electoral reform"],
|
68 |
-
"Elite Impunity & Judicial Injustice": ["class privilege", "unfair trials", "elite criminals", "unpunished crimes", "legal double standards"]
|
69 |
-
},
|
70 |
-
"Gender & Patriarchy": {
|
71 |
-
"Gender-Based Violence": ["violence", "domestic abuse", "sexual harassment", "femicide", "sexual assault", "stalking", "forced marriage", "honor killings"],
|
72 |
-
"Women's Rights & Equality": ["gender equality", "feminism", "reproductive rights", "patriarchy", "pay gap", "maternal health", "women’s leadership"],
|
73 |
-
"LGBTQ+ Rights": ["queer rights", "LGBTQ+", "gender identity", "trans rights", "homophobia", "pride", "same-sex marriage", "conversion therapy", "non-binary"],
|
74 |
-
"Gender & Healthcare Rights": ["gynecologic health", "menstrual health", "reproductive justice", "medical discrimination", "healthcare access"]
|
75 |
-
},
|
76 |
-
"Religious Freedom & Persecution": {
|
77 |
-
"Religious Discrimination": ["persecution", "intolerance", "sectarianism", "faith-based violence", "hate crime", "blasphemy", "religious hate speech"],
|
78 |
-
"Religious Minorities' Rights": ["minorities", "blasphemy laws", "religious freedom", "forced conversion", "places of worship", "religious refugees"],
|
79 |
-
"Misuse of Blasphemy Laws": ["false accusations", "extrajudicial killings", "mob violence", "sectarian extremism", "judicial bias"],
|
80 |
-
"Forced Conversions & Marriages": ["child abduction", "coercion", "underage marriages", "religious conversion abuse"]
|
81 |
-
},
|
82 |
-
"Grassroots Mobilization": {
|
83 |
-
"Community Activism": ["activism", "grassroots", "volunteering", "local organizing", "community engagement", "mutual aid", "citizen action"],
|
84 |
-
"Protests & Demonstrations": ["march", "strike", "rally", "sit-in", "boycott", "civil disobedience", "public gathering"],
|
85 |
-
"Coalition Building": ["solidarity", "collaboration", "alliances", "mutual aid", "networking", "joint statement", "collective movement"]
|
86 |
-
},
|
87 |
-
"Student Activism": {
|
88 |
-
"Student & Youth Activism": ["student protests", "university activism", "campus safety", "fee hikes", "student unions", "feminist student movements"]
|
89 |
-
},
|
90 |
-
"Environmental Crisis & Activism": {
|
91 |
-
"Climate Change Awareness": ["climate crisis", "global warming", "carbon emissions", "fossil fuels", "sea level rise", "heatwaves", "melting ice caps"],
|
92 |
-
"Conservation & Sustainability": ["deforestation", "wildlife protection", "biodiversity", "reforestation", "green energy", "sustainable agriculture"],
|
93 |
-
"Environmental Justice": ["pollution", "water crisis", "land rights", "indigenous rights", "eco-activism", "environmental racism", "waste management"],
|
94 |
-
"Natural Resource Exploitation & Displacement": ["mining", "deforestation", "water mismanagement", "corporate environmental harm", "land grabs"]
|
95 |
-
},
|
96 |
-
"Anti-Extremism & Anti-Violence": {
|
97 |
-
"Hate Speech & Radicalization": ["hate speech", "extremism", "online radicalization", "propaganda", "far-right groups", "hate groups"],
|
98 |
-
"Mob & Sectarian Violence": ["mob attack", "lynching", "sectarian violence", "hate crimes", "communal riots", "armed militia"],
|
99 |
-
"Counterterrorism & De-Radicalization": ["terrorism", "prevention", "peacebuilding", "rehabilitation", "extremist ideology", "security policy"]
|
100 |
-
},
|
101 |
-
"Social Inequality & Economic Disparities": {
|
102 |
-
"Class Privilege & Labor Rights": ["classism", "labor rights", "unions", "wage gap", "worker exploitation", "fair wages", "labor strikes"],
|
103 |
-
"Poverty & Economic Justice": ["poverty", "inequality", "economic disparity", "wealth gap", "unemployment", "food insecurity"],
|
104 |
-
"Housing & Healthcare": ["housing crisis", "healthcare access", "social safety nets", "homelessness", "Medicaid", "affordable housing"],
|
105 |
-
"Marginalized Labor Rights": ["sanitation workers", "job discrimination", "workplace abuse", "hazardous labor conditions", "fair employment"]
|
106 |
-
},
|
107 |
-
"Activism & Advocacy": {
|
108 |
-
"Policy Advocacy & Legal Reforms": ["campaign", "policy change", "legal advocacy", "legislative reform", "policy shift", "lobbying"],
|
109 |
-
"Social Media Activism": ["hashtags", "digital activism", "awareness campaign", "viral movement", "online protest", "cyber activism"],
|
110 |
-
"Freedom of Expression & Press": ["press freedom", "censorship", "media rights", "journalist safety", "fake news", "whistleblowing"]
|
111 |
-
},
|
112 |
-
"Systemic Oppression": {
|
113 |
-
"Marginalized Communities": ["minorities", "exclusion", "systemic discrimination", "oppression", "intersectionality"],
|
114 |
-
"Racial & Ethnic Discrimination": ["racism", "xenophobia", "ethnic cleansing", "casteism", "racial profiling", "hate speech"],
|
115 |
-
"Institutional Bias": ["institutional racism", "structural oppression", "biased laws", "discriminatory policies"]
|
116 |
-
},
|
117 |
-
"Intersectionality": {
|
118 |
-
"Multiple Oppressions": ["overlapping struggles", "intersecting identities", "double discrimination", "marginalization"],
|
119 |
-
"Women & Marginalized Identities": ["feminism", "queer feminism", "minority women", "disabled women", "indigenous women"],
|
120 |
-
"Global Solidarity Movements": ["transnational activism", "cross-movement solidarity", "international justice"]
|
121 |
-
},
|
122 |
-
"Call to Action": {
|
123 |
-
"Petitions & Direct Action": ["sign petition", "protest", "boycott", "demonstrate", "advocate"],
|
124 |
-
"Fundraising & Support": ["donate", "crowdfunding", "aid support", "mutual aid funds", "relief efforts"],
|
125 |
-
"Policy & Legislative Action": ["policy change", "demand action", "write to lawmakers", "call your representative"]
|
126 |
-
},
|
127 |
-
"Empowerment & Resistance": {
|
128 |
-
"Grassroots Organizing": ["community empowerment", "leadership training", "civil resistance", "community building"],
|
129 |
-
"Revolutionary Movements": ["resistance", "revolt", "revolutionary change", "radical change", "freedom fighters"],
|
130 |
-
"Inspiration & Motivational Messaging": ["hope", "courage", "overcoming struggles", "empowerment", "transformative justice"]
|
131 |
-
},
|
132 |
-
"Climate Justice": {
|
133 |
-
"Indigenous Environmental Activism": ["land rights", "indigenous climate leadership", "tribal land protection", "environmental sovereignty"],
|
134 |
-
"Corporate Accountability": ["big oil", "corporate greed", "environmental negligence", "corporate responsibility"],
|
135 |
-
"Sustainable Development": ["eco-friendly", "renewable energy", "circular economy", "climate resilience"]
|
136 |
-
},
|
137 |
-
"Human Rights Advocacy": {
|
138 |
-
"Criminal Justice Reform": ["police brutality", "wrongful convictions", "prison reform", "mass incarceration"],
|
139 |
-
"Workplace Discrimination & Labor Rights": ["workplace bias", "equal pay", "unions", "workplace harassment"],
|
140 |
-
"International Human Rights": ["humanitarian law", "UN declarations", "international treaties", "human rights violations"]
|
141 |
-
}
|
142 |
}
|
143 |
|
144 |
-
#
|
145 |
-
# Helper function to add a frame analysis table to the current post's section
|
146 |
-
# --------------------------
|
147 |
-
|
148 |
-
def add_frame_analysis_table(doc, full_text, frame_categories):
|
149 |
-
"""
|
150 |
-
For the given full_text (from a single post), counts keyword occurrences for each frame
|
151 |
-
from frame_categories and creates a table with 5 columns:
|
152 |
-
Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable
|
153 |
-
The highest-count frame is marked as "Major Focus", the next two as "Significant Focus",
|
154 |
-
remaining detected frames with positive counts are "Minor Mention", and frames with zero counts
|
155 |
-
are marked as "Not Applicable".
|
156 |
-
This table is added directly to the provided doc.
|
157 |
-
"""
|
158 |
-
# Count keyword occurrences for each frame
|
159 |
-
frame_counts = {}
|
160 |
-
text_lower = full_text.lower()
|
161 |
-
for main_cat, subcats in frame_categories.items():
|
162 |
-
for subcat, keywords in subcats.items():
|
163 |
-
frame_label = f"{main_cat} → {subcat}"
|
164 |
-
count = 0
|
165 |
-
for keyword in keywords:
|
166 |
-
count += len(re.findall(r'\b' + re.escape(keyword.lower()) + r'\b', text_lower))
|
167 |
-
frame_counts[frame_label] = count
|
168 |
-
|
169 |
-
# Determine focus level
|
170 |
-
frames_with_counts = [(frame, count) for frame, count in frame_counts.items() if count > 0]
|
171 |
-
frames_with_counts.sort(key=lambda x: x[1], reverse=True)
|
172 |
-
frame_focus = {}
|
173 |
-
if frames_with_counts:
|
174 |
-
for i, (frame, count) in enumerate(frames_with_counts):
|
175 |
-
if i == 0:
|
176 |
-
frame_focus[frame] = "Major Focus"
|
177 |
-
elif i < 3:
|
178 |
-
frame_focus[frame] = "Significant Focus"
|
179 |
-
else:
|
180 |
-
frame_focus[frame] = "Minor Mention"
|
181 |
-
# Frames with zero count become "Not Applicable"
|
182 |
-
for frame, count in frame_counts.items():
|
183 |
-
if count == 0:
|
184 |
-
frame_focus[frame] = "Not Applicable"
|
185 |
-
|
186 |
-
# Create the table in the current document
|
187 |
-
doc.add_paragraph("Frame Analysis:")
|
188 |
-
table = doc.add_table(rows=1, cols=5)
|
189 |
-
table.style = 'Table Grid'
|
190 |
-
hdr_cells = table.rows[0].cells
|
191 |
-
hdr_cells[0].text = "Frame"
|
192 |
-
hdr_cells[1].text = "Major Focus"
|
193 |
-
hdr_cells[2].text = "Significant Focus"
|
194 |
-
hdr_cells[3].text = "Minor Mention"
|
195 |
-
hdr_cells[4].text = "Not Applicable"
|
196 |
-
|
197 |
-
for frame in sorted(frame_focus.keys()):
|
198 |
-
category = frame_focus[frame]
|
199 |
-
row_cells = table.add_row().cells
|
200 |
-
row_cells[0].text = frame
|
201 |
-
row_cells[1].text = "✔ Major Focus" if category == "Major Focus" else "Major Focus"
|
202 |
-
row_cells[2].text = "✔ Significant Focus" if category == "Significant Focus" else "Significant Focus"
|
203 |
-
row_cells[3].text = "✔ Minor Mention" if category == "Minor Mention" else "Minor Mention"
|
204 |
-
row_cells[4].text = "✔ Not Applicable" if category == "Not Applicable" else "Not Applicable"
|
205 |
-
|
206 |
-
# --------------------------
|
207 |
-
# Merged DOCX creation function with inline frame analysis for each post
|
208 |
-
# --------------------------
|
209 |
-
|
210 |
-
def create_merged_docx(extracted_data, frame_categories):
|
211 |
-
"""
|
212 |
-
Creates a DOCX document where, for each post, all details (from extracted_data)
|
213 |
-
are displayed. Instead of a textual "Frames" field, an inline frame analysis table
|
214 |
-
(based on the post's "Full Caption" and frame_categories) is inserted.
|
215 |
-
"""
|
216 |
-
doc = Document()
|
217 |
-
ordered_keys = [
|
218 |
-
"Post Number", "Date of Post", "Media Type", "Number of Pictures",
|
219 |
-
"Number of Videos", "Number of Audios", "Likes", "Comments",
|
220 |
-
"Tagged Audience", "Full Caption", "Language", "Tone", "Hashtags"
|
221 |
-
]
|
222 |
-
|
223 |
-
for post_number, data in extracted_data.items():
|
224 |
-
doc.add_heading(post_number, level=1)
|
225 |
-
for key in ordered_keys:
|
226 |
-
value = data.get(key, "N/A")
|
227 |
-
if key in ["Tone", "Hashtags"]:
|
228 |
-
value = ", ".join(value) if isinstance(value, list) else value
|
229 |
-
doc.add_paragraph(f"**{key}:** {value}")
|
230 |
-
# Instead of displaying a textual Frames field, add the frame analysis table here
|
231 |
-
full_text = data.get("Full Caption", "")
|
232 |
-
if full_text:
|
233 |
-
add_frame_analysis_table(doc, full_text, frame_categories)
|
234 |
-
doc.add_paragraph("\n")
|
235 |
-
return doc
|
236 |
-
|
237 |
-
# --------------------------
|
238 |
-
# Other utility functions remain unchanged (language detection, tone extraction, etc.)
|
239 |
-
# --------------------------
|
240 |
-
|
241 |
def detect_language(text):
|
242 |
try:
|
243 |
return detect(text)
|
@@ -245,17 +73,17 @@ def detect_language(text):
|
|
245 |
logging.error(f"Error detecting language: {e}")
|
246 |
return "unknown"
|
247 |
|
|
|
248 |
def extract_tone(text):
|
249 |
try:
|
250 |
-
response = llm.chat([
|
251 |
-
|
252 |
-
{"role": "user", "content": text}
|
253 |
-
])
|
254 |
return response["choices"][0]["message"]["content"].split(", ")
|
255 |
except Exception as e:
|
256 |
logging.error(f"Groq API error: {e}")
|
257 |
return extract_tone_fallback(text)
|
258 |
|
|
|
259 |
def extract_tone_fallback(text):
|
260 |
detected_tones = set()
|
261 |
text_lower = text.lower()
|
@@ -264,9 +92,73 @@ def extract_tone_fallback(text):
|
|
264 |
detected_tones.add(category)
|
265 |
return list(detected_tones) if detected_tones else ["Neutral"]
|
266 |
|
|
|
267 |
def extract_hashtags(text):
|
268 |
return re.findall(r"#\w+", text)
|
269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
def extract_captions_from_docx(docx_file):
|
271 |
doc = Document(docx_file)
|
272 |
captions = {}
|
@@ -280,6 +172,7 @@ def extract_captions_from_docx(docx_file):
|
|
280 |
captions[current_post].append(text)
|
281 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
282 |
|
|
|
283 |
def extract_metadata_from_excel(excel_file):
|
284 |
try:
|
285 |
df = pd.read_excel(excel_file)
|
@@ -289,18 +182,38 @@ def extract_metadata_from_excel(excel_file):
|
|
289 |
logging.error(f"Error processing Excel file: {e}")
|
290 |
return []
|
291 |
|
|
|
292 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
293 |
for post_data in excel_metadata:
|
294 |
post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
|
295 |
if post_number in generated_data:
|
296 |
generated_data[post_number].update(post_data)
|
297 |
else:
|
298 |
-
generated_data[post_number] = post_data
|
299 |
return generated_data
|
300 |
|
301 |
-
#
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
st.title("AI-Powered Coding Sheet Generator")
|
306 |
st.write("Enter text or upload a DOCX/Excel file for analysis:")
|
@@ -310,37 +223,50 @@ uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
|
|
310 |
uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
|
311 |
|
312 |
output_data = {}
|
|
|
313 |
if input_text:
|
|
|
|
|
|
|
314 |
output_data["Manual Input"] = {
|
315 |
"Full Caption": input_text,
|
316 |
"Language": detect_language(input_text),
|
317 |
"Tone": extract_tone(input_text),
|
318 |
"Hashtags": extract_hashtags(input_text),
|
319 |
-
"Frames":
|
320 |
}
|
321 |
|
322 |
if uploaded_docx:
|
323 |
captions = extract_captions_from_docx(uploaded_docx)
|
324 |
for caption, text in captions.items():
|
|
|
|
|
325 |
output_data[caption] = {
|
326 |
"Full Caption": text,
|
327 |
"Language": detect_language(text),
|
328 |
"Tone": extract_tone(text),
|
329 |
"Hashtags": extract_hashtags(text),
|
330 |
-
"Frames":
|
331 |
}
|
332 |
|
333 |
if uploaded_excel:
|
334 |
excel_metadata = extract_metadata_from_excel(uploaded_excel)
|
335 |
output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
|
336 |
|
|
|
337 |
if output_data:
|
338 |
for post_number, data in output_data.items():
|
339 |
with st.expander(post_number):
|
340 |
for key, value in data.items():
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
|
47 |
}
|
48 |
|
49 |
+
# Frame categories for fallback method
|
50 |
frame_categories = {
|
51 |
+
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
|
52 |
+
"Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
|
53 |
+
"Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
|
54 |
+
"Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
|
55 |
+
"Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
|
56 |
+
"Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
|
57 |
+
"Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
|
58 |
+
"Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
|
59 |
+
"Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
|
60 |
+
"Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
|
61 |
+
"Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
|
62 |
+
"Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
|
63 |
+
"Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
|
64 |
+
"Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
|
65 |
+
"Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
}
|
67 |
|
68 |
+
# Detect language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def detect_language(text):
|
70 |
try:
|
71 |
return detect(text)
|
|
|
73 |
logging.error(f"Error detecting language: {e}")
|
74 |
return "unknown"
|
75 |
|
76 |
+
# Extract tone using Groq API (or fallback method)
|
77 |
def extract_tone(text):
|
78 |
try:
|
79 |
+
response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
|
80 |
+
{"role": "user", "content": text}])
|
|
|
|
|
81 |
return response["choices"][0]["message"]["content"].split(", ")
|
82 |
except Exception as e:
|
83 |
logging.error(f"Groq API error: {e}")
|
84 |
return extract_tone_fallback(text)
|
85 |
|
86 |
+
# Fallback method for tone extraction
|
87 |
def extract_tone_fallback(text):
|
88 |
detected_tones = set()
|
89 |
text_lower = text.lower()
|
|
|
92 |
detected_tones.add(category)
|
93 |
return list(detected_tones) if detected_tones else ["Neutral"]
|
94 |
|
95 |
+
# Extract hashtags
|
96 |
def extract_hashtags(text):
|
97 |
return re.findall(r"#\w+", text)
|
98 |
|
99 |
+
# -------------------------------------------------------------------
|
100 |
+
# New functions for frame categorization and display
|
101 |
+
# -------------------------------------------------------------------
|
102 |
+
|
103 |
+
def get_frame_category_mapping(text):
|
104 |
+
"""
|
105 |
+
Returns a mapping of every frame (from frame_categories) to one of the four categories.
|
106 |
+
Detected frames are assigned a focus level based on keyword frequency:
|
107 |
+
- Top detected: "Major Focus"
|
108 |
+
- Next up to two: "Significant Focus"
|
109 |
+
- Remaining detected: "Minor Mention"
|
110 |
+
Frames not detected get "Not Applicable".
|
111 |
+
"""
|
112 |
+
text_lower = text.lower()
|
113 |
+
# Calculate frequency for each frame
|
114 |
+
frame_freq = {}
|
115 |
+
for frame, keywords in frame_categories.items():
|
116 |
+
freq = sum(1 for word in keywords if word in text_lower)
|
117 |
+
frame_freq[frame] = freq
|
118 |
+
|
119 |
+
# Identify detected frames (frequency > 0) and sort descending
|
120 |
+
detected = [(frame, freq) for frame, freq in frame_freq.items() if freq > 0]
|
121 |
+
detected.sort(key=lambda x: x[1], reverse=True)
|
122 |
+
|
123 |
+
category_mapping = {}
|
124 |
+
if detected:
|
125 |
+
# Highest frequency frame as Major Focus
|
126 |
+
category_mapping[detected[0][0]] = "Major Focus"
|
127 |
+
# Next up to two frames as Significant Focus
|
128 |
+
for frame, _ in detected[1:3]:
|
129 |
+
category_mapping[frame] = "Significant Focus"
|
130 |
+
# Remaining detected frames as Minor Mention
|
131 |
+
for frame, _ in detected[3:]:
|
132 |
+
category_mapping[frame] = "Minor Mention"
|
133 |
+
# For frames not detected, assign Not Applicable
|
134 |
+
for frame in frame_categories.keys():
|
135 |
+
if frame not in category_mapping:
|
136 |
+
category_mapping[frame] = "Not Applicable"
|
137 |
+
return category_mapping
|
138 |
+
|
139 |
+
def format_frame_categories_table(mapping):
|
140 |
+
"""
|
141 |
+
Returns a markdown-formatted table that displays each frame along with four columns:
|
142 |
+
Major Focus, Significant Focus, Minor Mention, and Not Applicable.
|
143 |
+
A tick (✓) is shown only in the column corresponding to the assigned category.
|
144 |
+
"""
|
145 |
+
header = "| Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable |\n"
|
146 |
+
header += "| --- | --- | --- | --- | --- |\n"
|
147 |
+
rows = ""
|
148 |
+
tick = "✓"
|
149 |
+
for frame, category in mapping.items():
|
150 |
+
major = tick if category == "Major Focus" else ""
|
151 |
+
significant = tick if category == "Significant Focus" else ""
|
152 |
+
minor = tick if category == "Minor Mention" else ""
|
153 |
+
not_applicable = tick if category == "Not Applicable" else ""
|
154 |
+
rows += f"| {frame} | {major} | {significant} | {minor} | {not_applicable} |\n"
|
155 |
+
return header + rows
|
156 |
+
|
157 |
+
# -------------------------------------------------------------------
|
158 |
+
# Existing functions for file processing
|
159 |
+
# -------------------------------------------------------------------
|
160 |
+
|
161 |
+
# Extract captions from DOCX
|
162 |
def extract_captions_from_docx(docx_file):
|
163 |
doc = Document(docx_file)
|
164 |
captions = {}
|
|
|
172 |
captions[current_post].append(text)
|
173 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
174 |
|
175 |
+
# Extract metadata from Excel file
|
176 |
def extract_metadata_from_excel(excel_file):
|
177 |
try:
|
178 |
df = pd.read_excel(excel_file)
|
|
|
182 |
logging.error(f"Error processing Excel file: {e}")
|
183 |
return []
|
184 |
|
185 |
+
# Merge metadata with generated analysis
|
186 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
187 |
for post_data in excel_metadata:
|
188 |
post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
|
189 |
if post_number in generated_data:
|
190 |
generated_data[post_number].update(post_data)
|
191 |
else:
|
192 |
+
generated_data[post_number] = post_data
|
193 |
return generated_data
|
194 |
|
195 |
+
# Create DOCX file matching the uploaded format
|
196 |
+
def create_docx_from_data(extracted_data):
|
197 |
+
doc = Document()
|
198 |
+
for post_number, data in extracted_data.items():
|
199 |
+
doc.add_heading(post_number, level=1)
|
200 |
+
ordered_keys = [
|
201 |
+
"Post Number", "Date of Post", "Media Type", "Number of Pictures",
|
202 |
+
"Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
|
203 |
+
"Full Caption", "Language", "Tone", "Hashtags", "Frames"
|
204 |
+
]
|
205 |
+
for key in ordered_keys:
|
206 |
+
value = data.get(key, "N/A")
|
207 |
+
if key in ["Tone", "Hashtags"]:
|
208 |
+
value = ", ".join(value) if isinstance(value, list) else value
|
209 |
+
# For Frames, simply add the table text as is.
|
210 |
+
doc.add_paragraph(f"**{key}:** {value}")
|
211 |
+
doc.add_paragraph("\n")
|
212 |
+
return doc
|
213 |
+
|
214 |
+
# -------------------------------------------------------------------
|
215 |
+
# Streamlit App UI
|
216 |
+
# -------------------------------------------------------------------
|
217 |
|
218 |
st.title("AI-Powered Coding Sheet Generator")
|
219 |
st.write("Enter text or upload a DOCX/Excel file for analysis:")
|
|
|
223 |
uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
|
224 |
|
225 |
output_data = {}
|
226 |
+
|
227 |
if input_text:
|
228 |
+
# Process manual input text
|
229 |
+
frame_mapping = get_frame_category_mapping(input_text)
|
230 |
+
frames_table = format_frame_categories_table(frame_mapping)
|
231 |
output_data["Manual Input"] = {
|
232 |
"Full Caption": input_text,
|
233 |
"Language": detect_language(input_text),
|
234 |
"Tone": extract_tone(input_text),
|
235 |
"Hashtags": extract_hashtags(input_text),
|
236 |
+
"Frames": frames_table, # Markdown table displaying frame categories
|
237 |
}
|
238 |
|
239 |
if uploaded_docx:
|
240 |
captions = extract_captions_from_docx(uploaded_docx)
|
241 |
for caption, text in captions.items():
|
242 |
+
frame_mapping = get_frame_category_mapping(text)
|
243 |
+
frames_table = format_frame_categories_table(frame_mapping)
|
244 |
output_data[caption] = {
|
245 |
"Full Caption": text,
|
246 |
"Language": detect_language(text),
|
247 |
"Tone": extract_tone(text),
|
248 |
"Hashtags": extract_hashtags(text),
|
249 |
+
"Frames": frames_table,
|
250 |
}
|
251 |
|
252 |
if uploaded_excel:
|
253 |
excel_metadata = extract_metadata_from_excel(uploaded_excel)
|
254 |
output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
|
255 |
|
256 |
+
# Display results in collapsible sections
|
257 |
if output_data:
|
258 |
for post_number, data in output_data.items():
|
259 |
with st.expander(post_number):
|
260 |
for key, value in data.items():
|
261 |
+
if key == "Frames":
|
262 |
+
st.markdown(f"**{key}:**\n{value}")
|
263 |
+
else:
|
264 |
+
st.write(f"**{key}:** {value}")
|
265 |
+
|
266 |
+
# Generate DOCX output for download
|
267 |
+
if output_data:
|
268 |
+
docx_output = create_docx_from_data(output_data)
|
269 |
+
docx_io = io.BytesIO()
|
270 |
+
docx_output.save(docx_io)
|
271 |
+
docx_io.seek(0)
|
272 |
+
st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")
|