ahm14 commited on
Commit
bba1b37
·
verified ·
1 Parent(s): 4a942c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -205
app.py CHANGED
@@ -46,198 +46,26 @@ tone_categories = {
46
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
47
  }
48
 
49
- # Predefined frame categories for analysis
50
  frame_categories = {
51
- "Human Rights & Justice": {
52
- "Legal Rights & Reforms": ["law", "justice", "legal", "reforms", "legislation", "rights", "human rights", "court", "trial", "lawsuit", "due process"],
53
- "Humanitarian Issues": ["humanitarian", "aid", "refugees", "asylum", "crisis response", "displacement", "famine", "disaster relief", "war victims", "NGO support"],
54
- "Civil Liberties": ["freedom", "expression", "privacy", "rights violations", "censorship", "surveillance", "press freedom", "free speech", "whistleblower"],
55
- "State Repression & Human Rights Abuses": ["police brutality", "enforced disappearances", "political prisoners", "arbitrary arrests", "martial law", "crackdowns"],
56
- "Women's Rights": [
57
- "gender equality", "women's empowerment", "reproductive rights", "gender-based violence", "sexual harassment", "domestic violence",
58
- "equal pay", "education for women", "child marriage", "women's health", "maternity leave", "women in leadership", "honor killings",
59
- "karo kari", "patriarchal oppression", "honor-based violence", "marital violence", "violence against women", "justice for women",
60
- "reclaiming women's rights", "female autonomy", "societal control over women", "women's freedom of choice", "women’s bodies, women’s rights",
61
- "end honor killings", "violence against women must stop", "say no to patriarchy"
62
- ]
63
- },
64
- "Political & State Accountability": {
65
- "Corruption & Governance": ["corruption", "government", "policy", "accountability", "transparency", "bribery", "misuse of power", "scandal", "nepotism", "tax fraud"],
66
- "Political Oppression": ["authoritarianism", "censorship", "state control", "dissent", "political prisoners", "dictatorship", "crackdown", "enforced disappearances"],
67
- "Elections & Political Representation": ["voting", "elections", "political participation", "democracy", "voter suppression", "fraud", "ballot", "electoral reform"],
68
- "Elite Impunity & Judicial Injustice": ["class privilege", "unfair trials", "elite criminals", "unpunished crimes", "legal double standards"]
69
- },
70
- "Gender & Patriarchy": {
71
- "Gender-Based Violence": ["violence", "domestic abuse", "sexual harassment", "femicide", "sexual assault", "stalking", "forced marriage", "honor killings"],
72
- "Women's Rights & Equality": ["gender equality", "feminism", "reproductive rights", "patriarchy", "pay gap", "maternal health", "women’s leadership"],
73
- "LGBTQ+ Rights": ["queer rights", "LGBTQ+", "gender identity", "trans rights", "homophobia", "pride", "same-sex marriage", "conversion therapy", "non-binary"],
74
- "Gender & Healthcare Rights": ["gynecologic health", "menstrual health", "reproductive justice", "medical discrimination", "healthcare access"]
75
- },
76
- "Religious Freedom & Persecution": {
77
- "Religious Discrimination": ["persecution", "intolerance", "sectarianism", "faith-based violence", "hate crime", "blasphemy", "religious hate speech"],
78
- "Religious Minorities' Rights": ["minorities", "blasphemy laws", "religious freedom", "forced conversion", "places of worship", "religious refugees"],
79
- "Misuse of Blasphemy Laws": ["false accusations", "extrajudicial killings", "mob violence", "sectarian extremism", "judicial bias"],
80
- "Forced Conversions & Marriages": ["child abduction", "coercion", "underage marriages", "religious conversion abuse"]
81
- },
82
- "Grassroots Mobilization": {
83
- "Community Activism": ["activism", "grassroots", "volunteering", "local organizing", "community engagement", "mutual aid", "citizen action"],
84
- "Protests & Demonstrations": ["march", "strike", "rally", "sit-in", "boycott", "civil disobedience", "public gathering"],
85
- "Coalition Building": ["solidarity", "collaboration", "alliances", "mutual aid", "networking", "joint statement", "collective movement"]
86
- },
87
- "Student Activism": {
88
- "Student & Youth Activism": ["student protests", "university activism", "campus safety", "fee hikes", "student unions", "feminist student movements"]
89
- },
90
- "Environmental Crisis & Activism": {
91
- "Climate Change Awareness": ["climate crisis", "global warming", "carbon emissions", "fossil fuels", "sea level rise", "heatwaves", "melting ice caps"],
92
- "Conservation & Sustainability": ["deforestation", "wildlife protection", "biodiversity", "reforestation", "green energy", "sustainable agriculture"],
93
- "Environmental Justice": ["pollution", "water crisis", "land rights", "indigenous rights", "eco-activism", "environmental racism", "waste management"],
94
- "Natural Resource Exploitation & Displacement": ["mining", "deforestation", "water mismanagement", "corporate environmental harm", "land grabs"]
95
- },
96
- "Anti-Extremism & Anti-Violence": {
97
- "Hate Speech & Radicalization": ["hate speech", "extremism", "online radicalization", "propaganda", "far-right groups", "hate groups"],
98
- "Mob & Sectarian Violence": ["mob attack", "lynching", "sectarian violence", "hate crimes", "communal riots", "armed militia"],
99
- "Counterterrorism & De-Radicalization": ["terrorism", "prevention", "peacebuilding", "rehabilitation", "extremist ideology", "security policy"]
100
- },
101
- "Social Inequality & Economic Disparities": {
102
- "Class Privilege & Labor Rights": ["classism", "labor rights", "unions", "wage gap", "worker exploitation", "fair wages", "labor strikes"],
103
- "Poverty & Economic Justice": ["poverty", "inequality", "economic disparity", "wealth gap", "unemployment", "food insecurity"],
104
- "Housing & Healthcare": ["housing crisis", "healthcare access", "social safety nets", "homelessness", "Medicaid", "affordable housing"],
105
- "Marginalized Labor Rights": ["sanitation workers", "job discrimination", "workplace abuse", "hazardous labor conditions", "fair employment"]
106
- },
107
- "Activism & Advocacy": {
108
- "Policy Advocacy & Legal Reforms": ["campaign", "policy change", "legal advocacy", "legislative reform", "policy shift", "lobbying"],
109
- "Social Media Activism": ["hashtags", "digital activism", "awareness campaign", "viral movement", "online protest", "cyber activism"],
110
- "Freedom of Expression & Press": ["press freedom", "censorship", "media rights", "journalist safety", "fake news", "whistleblowing"]
111
- },
112
- "Systemic Oppression": {
113
- "Marginalized Communities": ["minorities", "exclusion", "systemic discrimination", "oppression", "intersectionality"],
114
- "Racial & Ethnic Discrimination": ["racism", "xenophobia", "ethnic cleansing", "casteism", "racial profiling", "hate speech"],
115
- "Institutional Bias": ["institutional racism", "structural oppression", "biased laws", "discriminatory policies"]
116
- },
117
- "Intersectionality": {
118
- "Multiple Oppressions": ["overlapping struggles", "intersecting identities", "double discrimination", "marginalization"],
119
- "Women & Marginalized Identities": ["feminism", "queer feminism", "minority women", "disabled women", "indigenous women"],
120
- "Global Solidarity Movements": ["transnational activism", "cross-movement solidarity", "international justice"]
121
- },
122
- "Call to Action": {
123
- "Petitions & Direct Action": ["sign petition", "protest", "boycott", "demonstrate", "advocate"],
124
- "Fundraising & Support": ["donate", "crowdfunding", "aid support", "mutual aid funds", "relief efforts"],
125
- "Policy & Legislative Action": ["policy change", "demand action", "write to lawmakers", "call your representative"]
126
- },
127
- "Empowerment & Resistance": {
128
- "Grassroots Organizing": ["community empowerment", "leadership training", "civil resistance", "community building"],
129
- "Revolutionary Movements": ["resistance", "revolt", "revolutionary change", "radical change", "freedom fighters"],
130
- "Inspiration & Motivational Messaging": ["hope", "courage", "overcoming struggles", "empowerment", "transformative justice"]
131
- },
132
- "Climate Justice": {
133
- "Indigenous Environmental Activism": ["land rights", "indigenous climate leadership", "tribal land protection", "environmental sovereignty"],
134
- "Corporate Accountability": ["big oil", "corporate greed", "environmental negligence", "corporate responsibility"],
135
- "Sustainable Development": ["eco-friendly", "renewable energy", "circular economy", "climate resilience"]
136
- },
137
- "Human Rights Advocacy": {
138
- "Criminal Justice Reform": ["police brutality", "wrongful convictions", "prison reform", "mass incarceration"],
139
- "Workplace Discrimination & Labor Rights": ["workplace bias", "equal pay", "unions", "workplace harassment"],
140
- "International Human Rights": ["humanitarian law", "UN declarations", "international treaties", "human rights violations"]
141
- }
142
  }
143
 
144
- # --------------------------
145
- # Helper function to add a frame analysis table to the current post's section
146
- # --------------------------
147
-
148
- def add_frame_analysis_table(doc, full_text, frame_categories):
149
- """
150
- For the given full_text (from a single post), counts keyword occurrences for each frame
151
- from frame_categories and creates a table with 5 columns:
152
- Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable
153
- The highest-count frame is marked as "Major Focus", the next two as "Significant Focus",
154
- remaining detected frames with positive counts are "Minor Mention", and frames with zero counts
155
- are marked as "Not Applicable".
156
- This table is added directly to the provided doc.
157
- """
158
- # Count keyword occurrences for each frame
159
- frame_counts = {}
160
- text_lower = full_text.lower()
161
- for main_cat, subcats in frame_categories.items():
162
- for subcat, keywords in subcats.items():
163
- frame_label = f"{main_cat} → {subcat}"
164
- count = 0
165
- for keyword in keywords:
166
- count += len(re.findall(r'\b' + re.escape(keyword.lower()) + r'\b', text_lower))
167
- frame_counts[frame_label] = count
168
-
169
- # Determine focus level
170
- frames_with_counts = [(frame, count) for frame, count in frame_counts.items() if count > 0]
171
- frames_with_counts.sort(key=lambda x: x[1], reverse=True)
172
- frame_focus = {}
173
- if frames_with_counts:
174
- for i, (frame, count) in enumerate(frames_with_counts):
175
- if i == 0:
176
- frame_focus[frame] = "Major Focus"
177
- elif i < 3:
178
- frame_focus[frame] = "Significant Focus"
179
- else:
180
- frame_focus[frame] = "Minor Mention"
181
- # Frames with zero count become "Not Applicable"
182
- for frame, count in frame_counts.items():
183
- if count == 0:
184
- frame_focus[frame] = "Not Applicable"
185
-
186
- # Create the table in the current document
187
- doc.add_paragraph("Frame Analysis:")
188
- table = doc.add_table(rows=1, cols=5)
189
- table.style = 'Table Grid'
190
- hdr_cells = table.rows[0].cells
191
- hdr_cells[0].text = "Frame"
192
- hdr_cells[1].text = "Major Focus"
193
- hdr_cells[2].text = "Significant Focus"
194
- hdr_cells[3].text = "Minor Mention"
195
- hdr_cells[4].text = "Not Applicable"
196
-
197
- for frame in sorted(frame_focus.keys()):
198
- category = frame_focus[frame]
199
- row_cells = table.add_row().cells
200
- row_cells[0].text = frame
201
- row_cells[1].text = "✔ Major Focus" if category == "Major Focus" else "Major Focus"
202
- row_cells[2].text = "✔ Significant Focus" if category == "Significant Focus" else "Significant Focus"
203
- row_cells[3].text = "✔ Minor Mention" if category == "Minor Mention" else "Minor Mention"
204
- row_cells[4].text = "✔ Not Applicable" if category == "Not Applicable" else "Not Applicable"
205
-
206
- # --------------------------
207
- # Merged DOCX creation function with inline frame analysis for each post
208
- # --------------------------
209
-
210
- def create_merged_docx(extracted_data, frame_categories):
211
- """
212
- Creates a DOCX document where, for each post, all details (from extracted_data)
213
- are displayed. Instead of a textual "Frames" field, an inline frame analysis table
214
- (based on the post's "Full Caption" and frame_categories) is inserted.
215
- """
216
- doc = Document()
217
- ordered_keys = [
218
- "Post Number", "Date of Post", "Media Type", "Number of Pictures",
219
- "Number of Videos", "Number of Audios", "Likes", "Comments",
220
- "Tagged Audience", "Full Caption", "Language", "Tone", "Hashtags"
221
- ]
222
-
223
- for post_number, data in extracted_data.items():
224
- doc.add_heading(post_number, level=1)
225
- for key in ordered_keys:
226
- value = data.get(key, "N/A")
227
- if key in ["Tone", "Hashtags"]:
228
- value = ", ".join(value) if isinstance(value, list) else value
229
- doc.add_paragraph(f"**{key}:** {value}")
230
- # Instead of displaying a textual Frames field, add the frame analysis table here
231
- full_text = data.get("Full Caption", "")
232
- if full_text:
233
- add_frame_analysis_table(doc, full_text, frame_categories)
234
- doc.add_paragraph("\n")
235
- return doc
236
-
237
- # --------------------------
238
- # Other utility functions remain unchanged (language detection, tone extraction, etc.)
239
- # --------------------------
240
-
241
  def detect_language(text):
242
  try:
243
  return detect(text)
@@ -245,17 +73,17 @@ def detect_language(text):
245
  logging.error(f"Error detecting language: {e}")
246
  return "unknown"
247
 
 
248
  def extract_tone(text):
249
  try:
250
- response = llm.chat([
251
- {"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
252
- {"role": "user", "content": text}
253
- ])
254
  return response["choices"][0]["message"]["content"].split(", ")
255
  except Exception as e:
256
  logging.error(f"Groq API error: {e}")
257
  return extract_tone_fallback(text)
258
 
 
259
  def extract_tone_fallback(text):
260
  detected_tones = set()
261
  text_lower = text.lower()
@@ -264,9 +92,73 @@ def extract_tone_fallback(text):
264
  detected_tones.add(category)
265
  return list(detected_tones) if detected_tones else ["Neutral"]
266
 
 
267
  def extract_hashtags(text):
268
  return re.findall(r"#\w+", text)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  def extract_captions_from_docx(docx_file):
271
  doc = Document(docx_file)
272
  captions = {}
@@ -280,6 +172,7 @@ def extract_captions_from_docx(docx_file):
280
  captions[current_post].append(text)
281
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
282
 
 
283
  def extract_metadata_from_excel(excel_file):
284
  try:
285
  df = pd.read_excel(excel_file)
@@ -289,18 +182,38 @@ def extract_metadata_from_excel(excel_file):
289
  logging.error(f"Error processing Excel file: {e}")
290
  return []
291
 
 
292
  def merge_metadata_with_generated_data(generated_data, excel_metadata):
293
  for post_data in excel_metadata:
294
  post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
295
  if post_number in generated_data:
296
  generated_data[post_number].update(post_data)
297
  else:
298
- generated_data[post_number] = post_data
299
  return generated_data
300
 
301
- # --------------------------
302
- # Streamlit App
303
- # --------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  st.title("AI-Powered Coding Sheet Generator")
306
  st.write("Enter text or upload a DOCX/Excel file for analysis:")
@@ -310,37 +223,50 @@ uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
310
  uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
311
 
312
  output_data = {}
 
313
  if input_text:
 
 
 
314
  output_data["Manual Input"] = {
315
  "Full Caption": input_text,
316
  "Language": detect_language(input_text),
317
  "Tone": extract_tone(input_text),
318
  "Hashtags": extract_hashtags(input_text),
319
- "Frames": None # Frame analysis will be computed inline
320
  }
321
 
322
  if uploaded_docx:
323
  captions = extract_captions_from_docx(uploaded_docx)
324
  for caption, text in captions.items():
 
 
325
  output_data[caption] = {
326
  "Full Caption": text,
327
  "Language": detect_language(text),
328
  "Tone": extract_tone(text),
329
  "Hashtags": extract_hashtags(text),
330
- "Frames": None # Inline frame analysis will be added
331
  }
332
 
333
  if uploaded_excel:
334
  excel_metadata = extract_metadata_from_excel(uploaded_excel)
335
  output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
336
 
 
337
  if output_data:
338
  for post_number, data in output_data.items():
339
  with st.expander(post_number):
340
  for key, value in data.items():
341
- st.write(f"**{key}:** {value}")
342
- merged_docx = create_merged_docx(output_data, frame_categories)
343
- merged_docx_io = io.BytesIO()
344
- merged_docx.save(merged_docx_io)
345
- merged_docx_io.seek(0)
346
- st.download_button("Download Merged Analysis DOCX", data=merged_docx_io, file_name="merged_analysis.docx")
 
 
 
 
 
 
 
46
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
47
  }
48
 
49
+ # Frame categories for fallback method
50
  frame_categories = {
51
+ "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
52
+ "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
53
+ "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
54
+ "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
55
+ "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
56
+ "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
57
+ "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
58
+ "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
59
+ "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
60
+ "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
61
+ "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
62
+ "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
63
+ "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
64
+ "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
65
+ "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
 
68
+ # Detect language
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def detect_language(text):
70
  try:
71
  return detect(text)
 
73
  logging.error(f"Error detecting language: {e}")
74
  return "unknown"
75
 
76
+ # Extract tone using Groq API (or fallback method)
77
  def extract_tone(text):
78
  try:
79
+ response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
80
+ {"role": "user", "content": text}])
 
 
81
  return response["choices"][0]["message"]["content"].split(", ")
82
  except Exception as e:
83
  logging.error(f"Groq API error: {e}")
84
  return extract_tone_fallback(text)
85
 
86
+ # Fallback method for tone extraction
87
  def extract_tone_fallback(text):
88
  detected_tones = set()
89
  text_lower = text.lower()
 
92
  detected_tones.add(category)
93
  return list(detected_tones) if detected_tones else ["Neutral"]
94
 
95
+ # Extract hashtags
96
  def extract_hashtags(text):
97
  return re.findall(r"#\w+", text)
98
 
99
+ # -------------------------------------------------------------------
100
+ # New functions for frame categorization and display
101
+ # -------------------------------------------------------------------
102
+
103
+ def get_frame_category_mapping(text):
104
+ """
105
+ Returns a mapping of every frame (from frame_categories) to one of the four categories.
106
+ Detected frames are assigned a focus level based on keyword frequency:
107
+ - Top detected: "Major Focus"
108
+ - Next up to two: "Significant Focus"
109
+ - Remaining detected: "Minor Mention"
110
+ Frames not detected get "Not Applicable".
111
+ """
112
+ text_lower = text.lower()
113
+ # Calculate frequency for each frame
114
+ frame_freq = {}
115
+ for frame, keywords in frame_categories.items():
116
+ freq = sum(1 for word in keywords if word in text_lower)
117
+ frame_freq[frame] = freq
118
+
119
+ # Identify detected frames (frequency > 0) and sort descending
120
+ detected = [(frame, freq) for frame, freq in frame_freq.items() if freq > 0]
121
+ detected.sort(key=lambda x: x[1], reverse=True)
122
+
123
+ category_mapping = {}
124
+ if detected:
125
+ # Highest frequency frame as Major Focus
126
+ category_mapping[detected[0][0]] = "Major Focus"
127
+ # Next up to two frames as Significant Focus
128
+ for frame, _ in detected[1:3]:
129
+ category_mapping[frame] = "Significant Focus"
130
+ # Remaining detected frames as Minor Mention
131
+ for frame, _ in detected[3:]:
132
+ category_mapping[frame] = "Minor Mention"
133
+ # For frames not detected, assign Not Applicable
134
+ for frame in frame_categories.keys():
135
+ if frame not in category_mapping:
136
+ category_mapping[frame] = "Not Applicable"
137
+ return category_mapping
138
+
139
+ def format_frame_categories_table(mapping):
140
+ """
141
+ Returns a markdown-formatted table that displays each frame along with four columns:
142
+ Major Focus, Significant Focus, Minor Mention, and Not Applicable.
143
+ A tick (✓) is shown only in the column corresponding to the assigned category.
144
+ """
145
+ header = "| Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable |\n"
146
+ header += "| --- | --- | --- | --- | --- |\n"
147
+ rows = ""
148
+ tick = "✓"
149
+ for frame, category in mapping.items():
150
+ major = tick if category == "Major Focus" else ""
151
+ significant = tick if category == "Significant Focus" else ""
152
+ minor = tick if category == "Minor Mention" else ""
153
+ not_applicable = tick if category == "Not Applicable" else ""
154
+ rows += f"| {frame} | {major} | {significant} | {minor} | {not_applicable} |\n"
155
+ return header + rows
156
+
157
+ # -------------------------------------------------------------------
158
+ # Existing functions for file processing
159
+ # -------------------------------------------------------------------
160
+
161
+ # Extract captions from DOCX
162
  def extract_captions_from_docx(docx_file):
163
  doc = Document(docx_file)
164
  captions = {}
 
172
  captions[current_post].append(text)
173
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
174
 
175
+ # Extract metadata from Excel file
176
  def extract_metadata_from_excel(excel_file):
177
  try:
178
  df = pd.read_excel(excel_file)
 
182
  logging.error(f"Error processing Excel file: {e}")
183
  return []
184
 
185
+ # Merge metadata with generated analysis
186
  def merge_metadata_with_generated_data(generated_data, excel_metadata):
187
  for post_data in excel_metadata:
188
  post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
189
  if post_number in generated_data:
190
  generated_data[post_number].update(post_data)
191
  else:
192
+ generated_data[post_number] = post_data
193
  return generated_data
194
 
195
+ # Create DOCX file matching the uploaded format
196
+ def create_docx_from_data(extracted_data):
197
+ doc = Document()
198
+ for post_number, data in extracted_data.items():
199
+ doc.add_heading(post_number, level=1)
200
+ ordered_keys = [
201
+ "Post Number", "Date of Post", "Media Type", "Number of Pictures",
202
+ "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
203
+ "Full Caption", "Language", "Tone", "Hashtags", "Frames"
204
+ ]
205
+ for key in ordered_keys:
206
+ value = data.get(key, "N/A")
207
+ if key in ["Tone", "Hashtags"]:
208
+ value = ", ".join(value) if isinstance(value, list) else value
209
+ # For Frames, simply add the table text as is.
210
+ doc.add_paragraph(f"**{key}:** {value}")
211
+ doc.add_paragraph("\n")
212
+ return doc
213
+
214
+ # -------------------------------------------------------------------
215
+ # Streamlit App UI
216
+ # -------------------------------------------------------------------
217
 
218
  st.title("AI-Powered Coding Sheet Generator")
219
  st.write("Enter text or upload a DOCX/Excel file for analysis:")
 
223
  uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
224
 
225
  output_data = {}
226
+
227
  if input_text:
228
+ # Process manual input text
229
+ frame_mapping = get_frame_category_mapping(input_text)
230
+ frames_table = format_frame_categories_table(frame_mapping)
231
  output_data["Manual Input"] = {
232
  "Full Caption": input_text,
233
  "Language": detect_language(input_text),
234
  "Tone": extract_tone(input_text),
235
  "Hashtags": extract_hashtags(input_text),
236
+ "Frames": frames_table, # Markdown table displaying frame categories
237
  }
238
 
239
  if uploaded_docx:
240
  captions = extract_captions_from_docx(uploaded_docx)
241
  for caption, text in captions.items():
242
+ frame_mapping = get_frame_category_mapping(text)
243
+ frames_table = format_frame_categories_table(frame_mapping)
244
  output_data[caption] = {
245
  "Full Caption": text,
246
  "Language": detect_language(text),
247
  "Tone": extract_tone(text),
248
  "Hashtags": extract_hashtags(text),
249
+ "Frames": frames_table,
250
  }
251
 
252
  if uploaded_excel:
253
  excel_metadata = extract_metadata_from_excel(uploaded_excel)
254
  output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
255
 
256
+ # Display results in collapsible sections
257
  if output_data:
258
  for post_number, data in output_data.items():
259
  with st.expander(post_number):
260
  for key, value in data.items():
261
+ if key == "Frames":
262
+ st.markdown(f"**{key}:**\n{value}")
263
+ else:
264
+ st.write(f"**{key}:** {value}")
265
+
266
+ # Generate DOCX output for download
267
+ if output_data:
268
+ docx_output = create_docx_from_data(output_data)
269
+ docx_io = io.BytesIO()
270
+ docx_output.save(docx_io)
271
+ docx_io.seek(0)
272
+ st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")