ahm14 commited on
Commit
34d7c10
·
verified ·
1 Parent(s): 773ca30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -81
app.py CHANGED
@@ -1,15 +1,27 @@
1
  import streamlit as st
2
  import re
3
- from langdetect import detect
4
- from transformers import pipeline
5
  import nltk
6
  from docx import Document
7
  import io
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Download required NLTK resources
10
- nltk.download('punkt')
11
 
12
- # Tone categories
13
  tone_categories = {
14
  "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
15
  "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
@@ -17,14 +29,13 @@ tone_categories = {
17
  "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
18
  "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
19
  "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
20
- "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
21
  "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
22
  "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
23
  "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
24
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
25
  }
26
 
27
- # Frame categories
28
  frame_categories = {
29
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
30
  "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
@@ -47,47 +58,56 @@ frame_categories = {
47
  def detect_language(text):
48
  try:
49
  return detect(text)
50
- except:
 
51
  return "unknown"
52
 
53
- # Extract tone
54
  def extract_tone(text):
55
- detected_tones = []
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  for category, keywords in tone_categories.items():
57
- if any(keyword in text.lower() for keyword in keywords):
58
- detected_tones.append(category)
59
- return detected_tones if detected_tones else ["Neutral"]
60
-
61
- # Categorize frames based on importance
62
- def categorize_frame_importance(text, keywords):
63
- keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
64
- if keyword_count > 2:
65
- return "Major Focus"
66
- elif keyword_count == 1 or keyword_count == 2:
67
- return "Significant Focus"
68
- else:
69
- return "Minor Mention"
70
-
71
- # Extract frames with categorization
72
- def extract_frames(text):
73
- detected_frames = {}
74
- for category, keywords in frame_categories.items():
75
- importance = categorize_frame_importance(text, keywords)
76
- if importance != "Minor Mention":
77
- detected_frames[category] = importance
78
-
79
- if not detected_frames:
80
- frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
81
- model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
82
- for label in model_result["labels"][:2]: # Top 2 frames
83
- detected_frames[label] = "Significant Focus"
84
-
85
- return detected_frames
86
 
87
  # Extract hashtags
88
  def extract_hashtags(text):
89
  return re.findall(r"#\w+", text)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # Extract captions from DOCX
92
  def extract_captions_from_docx(docx_file):
93
  doc = Document(docx_file)
@@ -100,35 +120,10 @@ def extract_captions_from_docx(docx_file):
100
  captions[current_post] = []
101
  elif current_post:
102
  captions[current_post].append(text)
103
-
104
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
105
 
106
- # Generate a DOCX file
107
- def generate_docx(output_data):
108
- doc = Document()
109
- doc.add_heading('Activism Message Analysis', 0)
110
-
111
- for index, (caption, result) in enumerate(output_data.items(), start=1):
112
- doc.add_heading(f"{index}. {caption}", level=1)
113
- doc.add_paragraph("Full Caption:")
114
- doc.add_paragraph(result['Full Caption'], style="Quote")
115
-
116
- doc.add_paragraph(f"Language: {result['Language']}")
117
- doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
118
- doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
119
- doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
120
-
121
- doc.add_heading('Frames:', level=2)
122
- for frame, importance in result['Frames'].items():
123
- doc.add_paragraph(f"{frame}: {importance}")
124
-
125
- doc_io = io.BytesIO()
126
- doc.save(doc_io)
127
- doc_io.seek(0)
128
- return doc_io
129
-
130
- # Streamlit UI
131
- st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
132
 
133
  st.write("Enter text or upload a DOCX file for analysis:")
134
 
@@ -138,32 +133,31 @@ input_text = st.text_area("Input Text", height=200)
138
  # File upload
139
  uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
140
 
 
141
  output_data = {}
142
 
143
  if input_text:
144
  output_data["Manual Input"] = {
145
- 'Full Caption': input_text,
146
- 'Language': detect_language(input_text),
147
- 'Tone of Caption': extract_tone(input_text),
148
- 'Hashtags': extract_hashtags(input_text),
149
- 'Hashtag Count': len(extract_hashtags(input_text)),
150
- 'Frames': extract_frames(input_text)
151
  }
152
- st.success("Text analysis completed.")
153
 
154
  if uploaded_file:
155
  captions = extract_captions_from_docx(uploaded_file)
156
  for caption, text in captions.items():
157
  output_data[caption] = {
158
- 'Full Caption': text,
159
- 'Language': detect_language(text),
160
- 'Tone of Caption': extract_tone(text),
161
- 'Hashtags': extract_hashtags(text),
162
- 'Hashtag Count': len(extract_hashtags(text)),
163
- 'Frames': extract_frames(text)
164
  }
165
- st.success("DOCX file analysis completed.")
166
 
 
167
  if output_data:
168
- docx_file = generate_docx(output_data)
169
- st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")
 
1
  import streamlit as st
2
  import re
3
+ import logging
 
4
  import nltk
5
  from docx import Document
6
  import io
7
+ from langdetect import detect
8
+ from transformers import pipeline
9
+ from groq import ChatGroq
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Initialize logging
16
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
17
+
18
+ # Initialize LLM (Groq API)
19
+ llm = ChatGroq(temperature=0.5, groq_api_key="GROQ_API_KEY", model_name="llama3-8b-8192")
20
 
21
  # Download required NLTK resources
22
+ nltk.download("punkt")
23
 
24
+ # Tone categories for fallback method
25
  tone_categories = {
26
  "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
27
  "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
 
29
  "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
30
  "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
31
  "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
 
32
  "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
33
  "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
34
  "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
35
  "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
36
  }
37
 
38
+ # Frame categories for fallback method
39
  frame_categories = {
40
  "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
41
  "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
 
58
  def detect_language(text):
59
  try:
60
  return detect(text)
61
+ except Exception as e:
62
+ logging.error(f"Error detecting language: {e}")
63
  return "unknown"
64
 
65
+ # Extract tone using Groq API (or fallback method)
66
  def extract_tone(text):
67
+ try:
68
+ response = llm.chat([
69
+ {"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
70
+ {"role": "user", "content": text}
71
+ ])
72
+ return response["choices"][0]["message"]["content"].split(", ")
73
+ except Exception as e:
74
+ logging.error(f"Groq API error: {e}")
75
+ return extract_tone_fallback(text)
76
+
77
+ # Fallback method for tone extraction
78
+ def extract_tone_fallback(text):
79
+ detected_tones = set()
80
+ text_lower = text.lower()
81
  for category, keywords in tone_categories.items():
82
+ if any(word in text_lower for word in keywords):
83
+ detected_tones.add(category)
84
+ return list(detected_tones) if detected_tones else ["Neutral"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  # Extract hashtags
87
  def extract_hashtags(text):
88
  return re.findall(r"#\w+", text)
89
 
90
+ # Extract frames using Groq API (or fallback)
91
+ def extract_frames(text):
92
+ try:
93
+ response = llm.chat([
94
+ {"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
95
+ {"role": "user", "content": text}
96
+ ])
97
+ return response["choices"][0]["message"]["content"]
98
+ except Exception as e:
99
+ logging.error(f"Groq API error: {e}")
100
+ return extract_frames_fallback(text)
101
+
102
+ # Fallback method for frame extraction
103
+ def extract_frames_fallback(text):
104
+ detected_frames = set()
105
+ text_lower = text.lower()
106
+ for category, keywords in frame_categories.items():
107
+ if any(word in text_lower for word in keywords):
108
+ detected_frames.add(category)
109
+ return list(detected_frames)
110
+
111
  # Extract captions from DOCX
112
  def extract_captions_from_docx(docx_file):
113
  doc = Document(docx_file)
 
120
  captions[current_post] = []
121
  elif current_post:
122
  captions[current_post].append(text)
 
123
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
124
 
125
+ # Streamlit app
126
+ st.title("AI-Powered Activism Message Analyzer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  st.write("Enter text or upload a DOCX file for analysis:")
129
 
 
133
  # File upload
134
  uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
135
 
136
+ # Initialize output dictionary
137
  output_data = {}
138
 
139
  if input_text:
140
  output_data["Manual Input"] = {
141
+ "Full Caption": input_text,
142
+ "Language": detect_language(input_text),
143
+ "Tone": extract_tone(input_text),
144
+ "Hashtags": extract_hashtags(input_text),
145
+ "Frames": extract_frames(input_text),
 
146
  }
147
+ st.success("Analysis completed for text input.")
148
 
149
  if uploaded_file:
150
  captions = extract_captions_from_docx(uploaded_file)
151
  for caption, text in captions.items():
152
  output_data[caption] = {
153
+ "Full Caption": text,
154
+ "Language": detect_language(text),
155
+ "Tone": extract_tone(text),
156
+ "Hashtags": extract_hashtags(text),
157
+ "Frames": extract_frames(text),
 
158
  }
159
+ st.success(f"Analysis completed for {len(captions)} posts.")
160
 
161
+ # Display results
162
  if output_data:
163
+ st.write(output_data)