Update app.py
Browse files
app.py
CHANGED
@@ -98,13 +98,26 @@ def extract_frames(text):
|
|
98 |
logging.error(f"Groq API error: {e}")
|
99 |
return extract_frames_fallback(text)
|
100 |
|
101 |
-
# Fallback method for frame extraction
|
102 |
def extract_frames_fallback(text):
|
103 |
detected_frames = set()
|
|
|
104 |
text_lower = text.lower()
|
|
|
105 |
for category, keywords in frame_categories.items():
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
return list(detected_frames)
|
109 |
|
110 |
# Extract captions from DOCX
|
@@ -121,64 +134,55 @@ def extract_captions_from_docx(docx_file):
|
|
121 |
captions[current_post].append(text)
|
122 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
123 |
|
124 |
-
#
|
125 |
def extract_metadata_from_excel(excel_file):
|
126 |
try:
|
127 |
df = pd.read_excel(excel_file)
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
except Exception as e:
|
131 |
-
logging.error(f"Error
|
132 |
-
return
|
133 |
|
134 |
# Merge metadata from Excel with the generated data
|
135 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
136 |
-
for
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
139 |
return generated_data
|
140 |
|
141 |
-
# Function to create
|
142 |
-
def
|
143 |
doc = Document()
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
doc.
|
149 |
-
|
150 |
-
# Adding the details for each post
|
151 |
-
doc.add_paragraph(f"Date of Post: {data.get('Date of Post', 'N/A')}")
|
152 |
-
doc.add_paragraph(f"Media Type: {data.get('Media Type', 'N/A')}")
|
153 |
-
doc.add_paragraph(f"No of Pictures: {data.get('No of Pictures', 0)}")
|
154 |
-
doc.add_paragraph(f"No of Videos: {data.get('No of Videos', 0)}")
|
155 |
-
doc.add_paragraph(f"No of Audios: {data.get('No of Audios', 0)}")
|
156 |
-
doc.add_paragraph(f"Likes: {data.get('Likes', 'N/A')}")
|
157 |
-
doc.add_paragraph(f"Comments: {data.get('Comments', 'N/A')}")
|
158 |
-
doc.add_paragraph(f"Tagged Audience: {data.get('Tagged Audience', 'No')}")
|
159 |
-
doc.add_paragraph(f"Caption: {data.get('Full Caption', 'N/A')}")
|
160 |
-
doc.add_paragraph(f"Language of Caption: {data.get('Language', 'N/A')}")
|
161 |
-
doc.add_paragraph(f"Total No of Hashtags: {len(data.get('Hashtags', []))}")
|
162 |
-
|
163 |
-
if data.get('Hashtags'):
|
164 |
-
doc.add_paragraph(f"Hashtags: {', '.join(data['Hashtags'])}")
|
165 |
-
else:
|
166 |
-
doc.add_paragraph("Hashtags: N/A")
|
167 |
-
|
168 |
-
# Adding Frames for each post
|
169 |
-
doc.add_heading("Frames", level=2)
|
170 |
-
if data.get("Frames"):
|
171 |
-
for frame in data['Frames']:
|
172 |
-
doc.add_paragraph(f"- {frame}")
|
173 |
-
else:
|
174 |
-
doc.add_paragraph("No Frames available")
|
175 |
|
176 |
-
|
177 |
-
|
178 |
-
# Save the document
|
179 |
-
doc.save(output_path)
|
180 |
-
|
181 |
-
# Streamlit app setup
|
182 |
st.title("AI-Powered Activism Message Analyzer")
|
183 |
|
184 |
st.write("Enter text or upload a DOCX/Excel file for analysis:")
|
@@ -195,18 +199,60 @@ uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
|
|
195 |
# Initialize output dictionary
|
196 |
output_data = {}
|
197 |
|
198 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
if uploaded_docx:
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
if uploaded_excel:
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
-
#
|
206 |
if output_data:
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
98 |
logging.error(f"Groq API error: {e}")
|
99 |
return extract_frames_fallback(text)
|
100 |
|
101 |
+
# Fallback method for frame extraction (with categorization of Major, Significant, Minor)
|
102 |
def extract_frames_fallback(text):
|
103 |
detected_frames = set()
|
104 |
+
frame_focus = {"Major Focus": [], "Significant Focus": [], "Minor Mention": []}
|
105 |
text_lower = text.lower()
|
106 |
+
|
107 |
for category, keywords in frame_categories.items():
|
108 |
+
keyword_count = sum(word in text_lower for word in keywords)
|
109 |
+
if keyword_count > 3:
|
110 |
+
frame_focus["Major Focus"].append(category)
|
111 |
+
elif keyword_count > 1:
|
112 |
+
frame_focus["Significant Focus"].append(category)
|
113 |
+
elif keyword_count > 0:
|
114 |
+
frame_focus["Minor Mention"].append(category)
|
115 |
+
|
116 |
+
# Return categorized frames
|
117 |
+
for focus, categories in frame_focus.items():
|
118 |
+
for category in categories:
|
119 |
+
detected_frames.add(f"{focus}: {category}")
|
120 |
+
|
121 |
return list(detected_frames)
|
122 |
|
123 |
# Extract captions from DOCX
|
|
|
134 |
captions[current_post].append(text)
|
135 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
136 |
|
137 |
+
# Function to extract metadata from an Excel file
|
138 |
def extract_metadata_from_excel(excel_file):
|
139 |
try:
|
140 |
df = pd.read_excel(excel_file)
|
141 |
+
# Ensure the required columns are present
|
142 |
+
required_columns = ["Date", "Media Type", "Number of Pictures", "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience"]
|
143 |
+
if not all(col in df.columns for col in required_columns):
|
144 |
+
st.error("Excel file is missing required columns.")
|
145 |
+
return []
|
146 |
+
extracted_data = []
|
147 |
+
for index, row in df.iterrows():
|
148 |
+
post_data = {
|
149 |
+
"Post Number": f"Post {index + 1}",
|
150 |
+
"Date of Post": row.get("Date", "N/A"),
|
151 |
+
"Media Type": row.get("Media Type", "N/A"),
|
152 |
+
"Number of Pictures": row.get("Number of Pictures", 0),
|
153 |
+
"Number of Videos": row.get("Number of Videos", 0),
|
154 |
+
"Number of Audios": row.get("Number of Audios", 0),
|
155 |
+
"Likes": row.get("Likes", 0),
|
156 |
+
"Comments": row.get("Comments", 0),
|
157 |
+
"Tagged Audience": row.get("Tagged Audience", "No"),
|
158 |
+
}
|
159 |
+
extracted_data.append(post_data)
|
160 |
+
return extracted_data
|
161 |
except Exception as e:
|
162 |
+
logging.error(f"Error processing Excel file: {e}")
|
163 |
+
return []
|
164 |
|
165 |
# Merge metadata from Excel with the generated data
|
166 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
167 |
+
for post_data in excel_metadata:
|
168 |
+
post_number = post_data["Post Number"]
|
169 |
+
if post_number in generated_data:
|
170 |
+
generated_data[post_number].update(post_data)
|
171 |
+
else:
|
172 |
+
generated_data[post_number] = post_data
|
173 |
return generated_data
|
174 |
|
175 |
+
# Function to create DOCX from extracted data
|
176 |
+
def create_docx_from_data(extracted_data):
|
177 |
doc = Document()
|
178 |
+
for post_number, data in extracted_data.items():
|
179 |
+
doc.add_heading(post_number, level=1)
|
180 |
+
for key, value in data.items():
|
181 |
+
doc.add_paragraph(f"{key}: {value}")
|
182 |
+
doc.add_paragraph("\n") # Add a line break between posts
|
183 |
+
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
+
# Streamlit app
|
|
|
|
|
|
|
|
|
|
|
186 |
st.title("AI-Powered Activism Message Analyzer")
|
187 |
|
188 |
st.write("Enter text or upload a DOCX/Excel file for analysis:")
|
|
|
199 |
# Initialize output dictionary
|
200 |
output_data = {}
|
201 |
|
202 |
+
# Process Text Input
|
203 |
+
if input_text:
|
204 |
+
output_data["Manual Input"] = {
|
205 |
+
"Full Caption": input_text,
|
206 |
+
"Language": detect_language(input_text),
|
207 |
+
"Tone": extract_tone(input_text),
|
208 |
+
"Hashtags": extract_hashtags(input_text),
|
209 |
+
"Frames": extract_frames(input_text),
|
210 |
+
}
|
211 |
+
st.success("Analysis completed for text input.")
|
212 |
+
|
213 |
+
# Process DOCX file
|
214 |
if uploaded_docx:
|
215 |
+
captions = extract_captions_from_docx(uploaded_docx)
|
216 |
+
for caption, text in captions.items():
|
217 |
+
output_data[caption] = {
|
218 |
+
"Full Caption": text,
|
219 |
+
"Language": detect_language(text),
|
220 |
+
"Tone": extract_tone(text),
|
221 |
+
"Hashtags": extract_hashtags(text),
|
222 |
+
"Frames": extract_frames(text),
|
223 |
+
}
|
224 |
+
st.success(f"Analysis completed for {len(captions)} posts from DOCX.")
|
225 |
+
|
226 |
+
# Process Excel file
|
227 |
+
if uploaded_excel:
|
228 |
+
with st.spinner("Processing Excel file..."):
|
229 |
+
excel_metadata = extract_metadata_from_excel(uploaded_excel)
|
230 |
+
if excel_metadata:
|
231 |
+
st.success(f"Excel metadata extracted with {len(excel_metadata)} posts.")
|
232 |
+
else:
|
233 |
+
st.warning("No valid data extracted from the Excel file.")
|
234 |
+
|
235 |
+
# Merge and display final data
|
236 |
if uploaded_excel:
|
237 |
+
output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
|
238 |
+
|
239 |
+
# Display results in collapsible sections for better UI
|
240 |
+
if output_data:
|
241 |
+
for post_number, data in output_data.items():
|
242 |
+
with st.expander(post_number):
|
243 |
+
for key, value in data.items():
|
244 |
+
st.write(f"**{key}:** {value}")
|
245 |
|
246 |
+
# Create DOCX file for download
|
247 |
if output_data:
|
248 |
+
doc = create_docx_from_data(output_data)
|
249 |
+
docx_io = io.BytesIO()
|
250 |
+
doc.save(docx_io)
|
251 |
+
docx_io.seek(0)
|
252 |
|
253 |
+
st.download_button(
|
254 |
+
label="Download Extracted Data as DOCX",
|
255 |
+
data=docx_io,
|
256 |
+
file_name="extracted_data.docx",
|
257 |
+
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
258 |
+
)
|