ahm14 commited on
Commit
3b8e826
·
verified ·
1 Parent(s): 409aff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -27
app.py CHANGED
@@ -14,6 +14,7 @@ from langchain_core.output_parsers import StrOutputParser
14
  from langchain_core.prompts import ChatPromptTemplate
15
  from transformers import pipeline
16
 
 
17
  # Load environment variables
18
  load_dotenv()
19
 
@@ -32,6 +33,20 @@ llm = ChatGroq(temperature=0.5, groq_api_key=GROQ_API_KEY, model_name="llama3-8b
32
  # Download required NLTK resources
33
  nltk.download("punkt")
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Frame categories with keywords
36
  frame_categories = {
37
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
@@ -52,7 +67,7 @@ def detect_language(text):
52
  logging.error(f"Error detecting language: {e}")
53
  return "unknown"
54
 
55
- # Extract tone using Groq API
56
  def extract_tone(text):
57
  try:
58
  response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
@@ -60,7 +75,16 @@ def extract_tone(text):
60
  return response["choices"][0]["message"]["content"].split(", ")
61
  except Exception as e:
62
  logging.error(f"Groq API error: {e}")
63
- return ["Neutral"]
 
 
 
 
 
 
 
 
 
64
 
65
  # Extract hashtags
66
  def extract_hashtags(text):
@@ -113,25 +137,7 @@ def extract_captions_from_docx(docx_file):
113
  def extract_metadata_from_excel(excel_file):
114
  try:
115
  df = pd.read_excel(excel_file)
116
- required_columns = ["Date", "Media Type", "Number of Pictures", "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience"]
117
- if not all(col in df.columns for col in required_columns):
118
- st.error("Excel file is missing required columns.")
119
- return []
120
-
121
- extracted_data = []
122
- for index, row in df.iterrows():
123
- post_data = {
124
- "Post Number": f"Post {index + 1}",
125
- "Date of Post": row.get("Date", "N/A"),
126
- "Media Type": row.get("Media Type", "N/A"),
127
- "Number of Pictures": row.get("Number of Pictures", 0),
128
- "Number of Videos": row.get("Number of Videos", 0),
129
- "Number of Audios": row.get("Number of Audios", 0),
130
- "Likes": row.get("Likes", 0),
131
- "Comments": row.get("Comments", 0),
132
- "Tagged Audience": row.get("Tagged Audience", "No"),
133
- }
134
- extracted_data.append(post_data)
135
  return extracted_data
136
  except Exception as e:
137
  logging.error(f"Error processing Excel file: {e}")
@@ -140,22 +146,39 @@ def extract_metadata_from_excel(excel_file):
140
  # Merge metadata with generated analysis
141
  def merge_metadata_with_generated_data(generated_data, excel_metadata):
142
  for post_data in excel_metadata:
143
- post_number = post_data["Post Number"]
144
  if post_number in generated_data:
145
  generated_data[post_number].update(post_data)
146
  else:
147
- generated_data[post_number] = post_data # Preserve metadata even if no text caption
148
-
149
  return generated_data
150
 
151
- # Create DOCX file from extracted data
152
  def create_docx_from_data(extracted_data):
153
  doc = Document()
 
154
  for post_number, data in extracted_data.items():
155
  doc.add_heading(post_number, level=1)
156
- for key, value in data.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  doc.add_paragraph(f"**{key}:** {value}")
158
- doc.add_paragraph("\n")
 
 
159
  return doc
160
 
161
  # Streamlit app
@@ -199,3 +222,4 @@ if output_data:
199
  docx_output.save(docx_io)
200
  docx_io.seek(0)
201
  st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")
 
 
14
  from langchain_core.prompts import ChatPromptTemplate
15
  from transformers import pipeline
16
 
17
+
18
  # Load environment variables
19
  load_dotenv()
20
 
 
33
  # Download required NLTK resources
34
  nltk.download("punkt")
35
 
36
+ # Tone categories for fallback method
37
+ tone_categories = {
38
+ "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
39
+ "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
40
+ "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
41
+ "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
42
+ "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
43
+ "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
44
+ "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
45
+ "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
46
+ "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
47
+ "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
48
+ }
49
+
50
  # Frame categories with keywords
51
  frame_categories = {
52
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
 
67
  logging.error(f"Error detecting language: {e}")
68
  return "unknown"
69
 
70
+ # Extract tone using Groq API (or fallback method)
71
  def extract_tone(text):
72
  try:
73
  response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
 
75
  return response["choices"][0]["message"]["content"].split(", ")
76
  except Exception as e:
77
  logging.error(f"Groq API error: {e}")
78
+ return extract_tone_fallback(text)
79
+
80
+ # Fallback method for tone extraction
81
+ def extract_tone_fallback(text):
82
+ detected_tones = set()
83
+ text_lower = text.lower()
84
+ for category, keywords in tone_categories.items():
85
+ if any(word in text_lower for word in keywords):
86
+ detected_tones.add(category)
87
+ return list(detected_tones) if detected_tones else ["Neutral"]
88
 
89
  # Extract hashtags
90
  def extract_hashtags(text):
 
137
  def extract_metadata_from_excel(excel_file):
138
  try:
139
  df = pd.read_excel(excel_file)
140
+ extracted_data = df.to_dict(orient="records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  return extracted_data
142
  except Exception as e:
143
  logging.error(f"Error processing Excel file: {e}")
 
146
  # Merge metadata with generated analysis
147
  def merge_metadata_with_generated_data(generated_data, excel_metadata):
148
  for post_data in excel_metadata:
149
+ post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
150
  if post_number in generated_data:
151
  generated_data[post_number].update(post_data)
152
  else:
153
+ generated_data[post_number] = post_data
 
154
  return generated_data
155
 
156
+ # Create DOCX file matching the uploaded format
157
  def create_docx_from_data(extracted_data):
158
  doc = Document()
159
+
160
  for post_number, data in extracted_data.items():
161
  doc.add_heading(post_number, level=1)
162
+
163
+ ordered_keys = [
164
+ "Post Number", "Date of Post", "Media Type", "Number of Pictures",
165
+ "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
166
+ "Full Caption", "Language", "Tone", "Hashtags", "Frames"
167
+ ]
168
+
169
+ for key in ordered_keys:
170
+ value = data.get(key, "N/A")
171
+
172
+ if key in ["Tone", "Hashtags"]:
173
+ value = ", ".join(value) if isinstance(value, list) else value
174
+ elif key == "Frames" and isinstance(value, dict):
175
+ frame_text = "\n".join([f" {category}: {', '.join(frames)}" for category, frames in value.items() if frames])
176
+ value = f"\n{frame_text}" if frame_text else "N/A"
177
+
178
  doc.add_paragraph(f"**{key}:** {value}")
179
+
180
+ doc.add_paragraph("\n")
181
+
182
  return doc
183
 
184
  # Streamlit app
 
222
  docx_output.save(docx_io)
223
  docx_io.seek(0)
224
  st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")
225
+