ahm14 commited on
Commit
e38265e
·
verified ·
1 Parent(s): 3b8e826

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -98,9 +98,9 @@ def categorize_frames(frame_list):
98
  sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
99
 
100
  for i, (frame, count) in enumerate(sorted_frames):
101
- if i == 0: # Highest frequency frame
102
  categorized_frames["Major Focus"].append(frame)
103
- elif i < 3: # Top 3 most mentioned frames
104
  categorized_frames["Significant Focus"].append(frame)
105
  else:
106
  categorized_frames["Minor Mention"].append(frame)
@@ -143,39 +143,40 @@ def extract_metadata_from_excel(excel_file):
143
  logging.error(f"Error processing Excel file: {e}")
144
  return []
145
 
146
- # Merge metadata with generated analysis
147
- def merge_metadata_with_generated_data(generated_data, excel_metadata):
148
- for post_data in excel_metadata:
149
- post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
150
- if post_number in generated_data:
151
- generated_data[post_number].update(post_data)
152
- else:
153
- generated_data[post_number] = post_data
154
- return generated_data
155
-
156
- # Create DOCX file matching the uploaded format
157
  def create_docx_from_data(extracted_data):
158
  doc = Document()
159
 
160
- for post_number, data in extracted_data.items():
161
- doc.add_heading(post_number, level=1)
162
 
163
- ordered_keys = [
164
- "Post Number", "Date of Post", "Media Type", "Number of Pictures",
165
- "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
166
- "Full Caption", "Language", "Tone", "Hashtags", "Frames"
167
  ]
168
 
169
- for key in ordered_keys:
170
- value = data.get(key, "N/A")
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- if key in ["Tone", "Hashtags"]:
173
- value = ", ".join(value) if isinstance(value, list) else value
174
- elif key == "Frames" and isinstance(value, dict):
175
- frame_text = "\n".join([f" {category}: {', '.join(frames)}" for category, frames in value.items() if frames])
176
- value = f"\n{frame_text}" if frame_text else "N/A"
177
 
178
- doc.add_paragraph(f"**{key}:** {value}")
 
 
179
 
180
  doc.add_paragraph("\n")
181
 
@@ -190,31 +191,32 @@ input_text = st.text_area("Input Text", height=200)
190
  uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
191
  uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
192
 
193
- output_data = {}
 
 
 
194
 
195
  if input_text:
196
- output_data["Manual Input"] = {
197
  "Full Caption": input_text,
198
  "Language": detect_language(input_text),
199
  "Tone": extract_tone(input_text),
200
  "Hashtags": extract_hashtags(input_text),
201
  "Frames": extract_frames_fallback(input_text),
202
  }
 
203
 
204
  if uploaded_docx:
205
  captions = extract_captions_from_docx(uploaded_docx)
206
  for caption, text in captions.items():
207
- output_data[caption] = {
208
  "Full Caption": text,
209
  "Language": detect_language(text),
210
  "Tone": extract_tone(text),
211
  "Hashtags": extract_hashtags(text),
212
  "Frames": extract_frames_fallback(text),
213
  }
214
-
215
- if uploaded_excel:
216
- excel_metadata = extract_metadata_from_excel(uploaded_excel)
217
- output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
218
 
219
  if output_data:
220
  docx_output = create_docx_from_data(output_data)
@@ -222,4 +224,3 @@ if output_data:
222
  docx_output.save(docx_io)
223
  docx_io.seek(0)
224
  st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")
225
-
 
98
  sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
99
 
100
  for i, (frame, count) in enumerate(sorted_frames):
101
+ if i == 0:
102
  categorized_frames["Major Focus"].append(frame)
103
+ elif i < 3:
104
  categorized_frames["Significant Focus"].append(frame)
105
  else:
106
  categorized_frames["Minor Mention"].append(frame)
 
143
  logging.error(f"Error processing Excel file: {e}")
144
  return []
145
 
146
+ # Create DOCX file in the required format
 
 
 
 
 
 
 
 
 
 
147
  def create_docx_from_data(extracted_data):
148
  doc = Document()
149
 
150
+ for index, data in enumerate(extracted_data, start=1):
151
+ doc.add_heading(f"Sr No {index}:", level=1)
152
 
153
+ metadata_fields = [
154
+ "Date of Post", "Media Type", "Number of Pictures", "Number of Videos",
155
+ "Number of Audios", "Likes", "Comments", "Tagged Audience"
 
156
  ]
157
 
158
+ for field in metadata_fields:
159
+ value = data.get(field, "N/A")
160
+ doc.add_paragraph(f"**{field}:** {value}")
161
+
162
+ caption_text = data.get("Full Caption", "N/A")
163
+ doc.add_paragraph(f"**Caption:** {caption_text}")
164
+
165
+ language = data.get("Language", "N/A")
166
+ doc.add_paragraph(f"**Language:** {language}")
167
+
168
+ tone = ", ".join(data.get("Tone", ["N/A"]))
169
+ doc.add_paragraph(f"**Tone:** {tone}")
170
+
171
+ hashtags = ", ".join(data.get("Hashtags", []))
172
+ doc.add_paragraph(f"**Hashtags:** {hashtags}")
173
 
174
+ frames = data.get("Frames", {})
175
+ doc.add_paragraph("**Frames:**")
 
 
 
176
 
177
+ for category, frame_list in frames.items():
178
+ if frame_list:
179
+ doc.add_paragraph(f" {category}: {', '.join(frame_list)}")
180
 
181
  doc.add_paragraph("\n")
182
 
 
191
  uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
192
  uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
193
 
194
+ output_data = []
195
+
196
+ if uploaded_excel:
197
+ output_data = extract_metadata_from_excel(uploaded_excel)
198
 
199
  if input_text:
200
+ text_analysis = {
201
  "Full Caption": input_text,
202
  "Language": detect_language(input_text),
203
  "Tone": extract_tone(input_text),
204
  "Hashtags": extract_hashtags(input_text),
205
  "Frames": extract_frames_fallback(input_text),
206
  }
207
+ output_data.append(text_analysis)
208
 
209
  if uploaded_docx:
210
  captions = extract_captions_from_docx(uploaded_docx)
211
  for caption, text in captions.items():
212
+ text_analysis = {
213
  "Full Caption": text,
214
  "Language": detect_language(text),
215
  "Tone": extract_tone(text),
216
  "Hashtags": extract_hashtags(text),
217
  "Frames": extract_frames_fallback(text),
218
  }
219
+ output_data.append(text_analysis)
 
 
 
220
 
221
  if output_data:
222
  docx_output = create_docx_from_data(output_data)
 
224
  docx_output.save(docx_io)
225
  docx_io.seek(0)
226
  st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")