Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|
14 |
from langchain_core.prompts import ChatPromptTemplate
|
15 |
from transformers import pipeline
|
16 |
|
|
|
17 |
# Load environment variables
|
18 |
load_dotenv()
|
19 |
|
@@ -32,6 +33,20 @@ llm = ChatGroq(temperature=0.5, groq_api_key=GROQ_API_KEY, model_name="llama3-8b
|
|
32 |
# Download required NLTK resources
|
33 |
nltk.download("punkt")
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Frame categories with keywords
|
36 |
frame_categories = {
|
37 |
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
|
@@ -52,7 +67,7 @@ def detect_language(text):
|
|
52 |
logging.error(f"Error detecting language: {e}")
|
53 |
return "unknown"
|
54 |
|
55 |
-
# Extract tone using Groq API
|
56 |
def extract_tone(text):
|
57 |
try:
|
58 |
response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
|
@@ -60,7 +75,16 @@ def extract_tone(text):
|
|
60 |
return response["choices"][0]["message"]["content"].split(", ")
|
61 |
except Exception as e:
|
62 |
logging.error(f"Groq API error: {e}")
|
63 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# Extract hashtags
|
66 |
def extract_hashtags(text):
|
@@ -113,25 +137,7 @@ def extract_captions_from_docx(docx_file):
|
|
113 |
def extract_metadata_from_excel(excel_file):
|
114 |
try:
|
115 |
df = pd.read_excel(excel_file)
|
116 |
-
|
117 |
-
if not all(col in df.columns for col in required_columns):
|
118 |
-
st.error("Excel file is missing required columns.")
|
119 |
-
return []
|
120 |
-
|
121 |
-
extracted_data = []
|
122 |
-
for index, row in df.iterrows():
|
123 |
-
post_data = {
|
124 |
-
"Post Number": f"Post {index + 1}",
|
125 |
-
"Date of Post": row.get("Date", "N/A"),
|
126 |
-
"Media Type": row.get("Media Type", "N/A"),
|
127 |
-
"Number of Pictures": row.get("Number of Pictures", 0),
|
128 |
-
"Number of Videos": row.get("Number of Videos", 0),
|
129 |
-
"Number of Audios": row.get("Number of Audios", 0),
|
130 |
-
"Likes": row.get("Likes", 0),
|
131 |
-
"Comments": row.get("Comments", 0),
|
132 |
-
"Tagged Audience": row.get("Tagged Audience", "No"),
|
133 |
-
}
|
134 |
-
extracted_data.append(post_data)
|
135 |
return extracted_data
|
136 |
except Exception as e:
|
137 |
logging.error(f"Error processing Excel file: {e}")
|
@@ -140,22 +146,39 @@ def extract_metadata_from_excel(excel_file):
|
|
140 |
# Merge metadata with generated analysis
|
141 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
142 |
for post_data in excel_metadata:
|
143 |
-
post_number =
|
144 |
if post_number in generated_data:
|
145 |
generated_data[post_number].update(post_data)
|
146 |
else:
|
147 |
-
generated_data[post_number] = post_data
|
148 |
-
|
149 |
return generated_data
|
150 |
|
151 |
-
# Create DOCX file
|
152 |
def create_docx_from_data(extracted_data):
|
153 |
doc = Document()
|
|
|
154 |
for post_number, data in extracted_data.items():
|
155 |
doc.add_heading(post_number, level=1)
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
doc.add_paragraph(f"**{key}:** {value}")
|
158 |
-
|
|
|
|
|
159 |
return doc
|
160 |
|
161 |
# Streamlit app
|
@@ -199,3 +222,4 @@ if output_data:
|
|
199 |
docx_output.save(docx_io)
|
200 |
docx_io.seek(0)
|
201 |
st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")
|
|
|
|
14 |
from langchain_core.prompts import ChatPromptTemplate
|
15 |
from transformers import pipeline
|
16 |
|
17 |
+
|
18 |
# Load environment variables
|
19 |
load_dotenv()
|
20 |
|
|
|
33 |
# Download required NLTK resources
|
34 |
nltk.download("punkt")
|
35 |
|
36 |
+
# Tone categories for fallback method
|
37 |
+
tone_categories = {
|
38 |
+
"Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
|
39 |
+
"Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
|
40 |
+
"Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
|
41 |
+
"Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
|
42 |
+
"Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
|
43 |
+
"Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
|
44 |
+
"Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
|
45 |
+
"Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
|
46 |
+
"Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
|
47 |
+
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
|
48 |
+
}
|
49 |
+
|
50 |
# Frame categories with keywords
|
51 |
frame_categories = {
|
52 |
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
|
|
|
67 |
logging.error(f"Error detecting language: {e}")
|
68 |
return "unknown"
|
69 |
|
70 |
+
# Extract tone using Groq API (or fallback method)
|
71 |
def extract_tone(text):
|
72 |
try:
|
73 |
response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
|
|
|
75 |
return response["choices"][0]["message"]["content"].split(", ")
|
76 |
except Exception as e:
|
77 |
logging.error(f"Groq API error: {e}")
|
78 |
+
return extract_tone_fallback(text)
|
79 |
+
|
80 |
+
# Fallback method for tone extraction
|
81 |
+
def extract_tone_fallback(text):
|
82 |
+
detected_tones = set()
|
83 |
+
text_lower = text.lower()
|
84 |
+
for category, keywords in tone_categories.items():
|
85 |
+
if any(word in text_lower for word in keywords):
|
86 |
+
detected_tones.add(category)
|
87 |
+
return list(detected_tones) if detected_tones else ["Neutral"]
|
88 |
|
89 |
# Extract hashtags
|
90 |
def extract_hashtags(text):
|
|
|
137 |
def extract_metadata_from_excel(excel_file):
|
138 |
try:
|
139 |
df = pd.read_excel(excel_file)
|
140 |
+
extracted_data = df.to_dict(orient="records")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
return extracted_data
|
142 |
except Exception as e:
|
143 |
logging.error(f"Error processing Excel file: {e}")
|
|
|
146 |
# Merge metadata with generated analysis
|
147 |
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
148 |
for post_data in excel_metadata:
|
149 |
+
post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
|
150 |
if post_number in generated_data:
|
151 |
generated_data[post_number].update(post_data)
|
152 |
else:
|
153 |
+
generated_data[post_number] = post_data
|
|
|
154 |
return generated_data
|
155 |
|
156 |
+
# Create DOCX file matching the uploaded format
|
157 |
def create_docx_from_data(extracted_data):
|
158 |
doc = Document()
|
159 |
+
|
160 |
for post_number, data in extracted_data.items():
|
161 |
doc.add_heading(post_number, level=1)
|
162 |
+
|
163 |
+
ordered_keys = [
|
164 |
+
"Post Number", "Date of Post", "Media Type", "Number of Pictures",
|
165 |
+
"Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
|
166 |
+
"Full Caption", "Language", "Tone", "Hashtags", "Frames"
|
167 |
+
]
|
168 |
+
|
169 |
+
for key in ordered_keys:
|
170 |
+
value = data.get(key, "N/A")
|
171 |
+
|
172 |
+
if key in ["Tone", "Hashtags"]:
|
173 |
+
value = ", ".join(value) if isinstance(value, list) else value
|
174 |
+
elif key == "Frames" and isinstance(value, dict):
|
175 |
+
frame_text = "\n".join([f" {category}: {', '.join(frames)}" for category, frames in value.items() if frames])
|
176 |
+
value = f"\n{frame_text}" if frame_text else "N/A"
|
177 |
+
|
178 |
doc.add_paragraph(f"**{key}:** {value}")
|
179 |
+
|
180 |
+
doc.add_paragraph("\n")
|
181 |
+
|
182 |
return doc
|
183 |
|
184 |
# Streamlit app
|
|
|
222 |
docx_output.save(docx_io)
|
223 |
docx_io.seek(0)
|
224 |
st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="merged_analysis.docx")
|
225 |
+
|