Ozgur Unlu commited on
Commit
d52122b
·
1 Parent(s): 3256b66
Files changed (4) hide show
  1. app.py +197 -0
  2. news-checker.py +73 -0
  3. pdf_generator.py +44 -0
  4. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForSequenceClassification,
6
+ pipeline
7
+ )
8
+ import os
9
+ from pdf_generator import ReportGenerator
10
+ from news_checker import NewsChecker
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # Initialize models and tokenizers
16
+ def load_models():
17
+ # Hate speech detection model
18
+ hate_tokenizer = AutoTokenizer.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
19
+ hate_model = AutoModelForSequenceClassification.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
20
+
21
+ # Bias detection (using same model with different labels)
22
+ bias_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
23
+ bias_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
24
+
25
+ return {
26
+ 'hate_speech': (hate_model, hate_tokenizer),
27
+ 'bias': (bias_model, bias_tokenizer)
28
+ }
29
+
30
+ # Initialize news checker
31
+ news_checker = NewsChecker()
32
+
33
+ def check_text_length(text):
34
+ if len(text) > 1000:
35
+ return {
36
+ 'status': 'fail',
37
+ 'message': 'Text exceeds 1000 character limit'
38
+ }
39
+ return {
40
+ 'status': 'pass',
41
+ 'message': 'Text length is within limits'
42
+ }
43
+
44
+ def check_hate_speech(text, model, tokenizer):
45
+ try:
46
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
47
+ outputs = model(**inputs)
48
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
49
+
50
+ # Threshold for hate speech detection
51
+ if predictions[0][1].item() > 0.3: # Adjusted threshold
52
+ return {
53
+ 'status': 'fail',
54
+ 'message': 'Potential hate speech detected'
55
+ }
56
+ elif predictions[0][1].item() > 0.1:
57
+ return {
58
+ 'status': 'warning',
59
+ 'message': 'Some concerning language detected - please review'
60
+ }
61
+ return {
62
+ 'status': 'pass',
63
+ 'message': 'No hate speech detected'
64
+ }
65
+ except Exception as e:
66
+ return {
67
+ 'status': 'error',
68
+ 'message': f'Error in hate speech detection: {str(e)}'
69
+ }
70
+
71
+ def check_grammar(text):
72
+ try:
73
+ nlp = pipeline("text2text-generation", model="gramformer/gramformer", device=0 if torch.cuda.is_available() else -1)
74
+ corrected = nlp(text, max_length=1000)[0]['generated_text']
75
+
76
+ if corrected.lower() != text.lower():
77
+ return {
78
+ 'status': 'warning',
79
+ 'message': f'Suggested corrections:\n{corrected}'
80
+ }
81
+ return {
82
+ 'status': 'pass',
83
+ 'message': 'No grammar issues detected'
84
+ }
85
+ except Exception as e:
86
+ return {
87
+ 'status': 'error',
88
+ 'message': f'Error in grammar check: {str(e)}'
89
+ }
90
+
91
+ def analyze_content(text):
92
+ # Initialize report generator
93
+ report_gen = ReportGenerator()
94
+ report_gen.add_header()
95
+ report_gen.add_input_text(text)
96
+
97
+ # Load models
98
+ models = load_models()
99
+
100
+ # Run all checks
101
+ results = {}
102
+
103
+ # 1. Length Check
104
+ length_result = check_text_length(text)
105
+ results['Length Check'] = length_result
106
+ report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
107
+
108
+ if length_result['status'] == 'fail':
109
+ report_path = report_gen.save_report()
110
+ return results, report_path
111
+
112
+ # 2. Hate Speech Check
113
+ hate_result = check_hate_speech(text, models['hate_speech'][0], models['hate_speech'][1])
114
+ results['Hate Speech Check'] = hate_result
115
+ report_gen.add_check_result("Hate Speech Check", hate_result['status'], hate_result['message'])
116
+
117
+ # 3. Grammar Check
118
+ grammar_result = check_grammar(text)
119
+ results['Grammar Check'] = grammar_result
120
+ report_gen.add_check_result("Grammar Check", grammar_result['status'], grammar_result['message'])
121
+
122
+ # 4. News Context Check
123
+ news_result = news_checker.check_content_against_news(text)
124
+ results['Current Events Context'] = news_result
125
+ report_gen.add_check_result("Current Events Context", news_result['status'], news_result['message'])
126
+
127
+ # Generate and save report
128
+ report_path = report_gen.save_report()
129
+
130
+ return results, report_path
131
+
132
+ def format_results(results):
133
+ status_symbols = {
134
+ 'pass': '✅',
135
+ 'fail': '❌',
136
+ 'warning': '⚠️',
137
+ 'error': '⚠️'
138
+ }
139
+
140
+ formatted_output = ""
141
+ for check, result in results.items():
142
+ symbol = status_symbols.get(result['status'], '❓')
143
+ formatted_output += f"{check}: {symbol}\n"
144
+ if result['message']:
145
+ formatted_output += f"Details: {result['message']}\n\n"
146
+
147
+ return formatted_output
148
+
149
+ # Gradio Interface
150
+ def create_interface():
151
+ with gr.Blocks(title="Marketing Content Validator") as interface:
152
+ gr.Markdown("# Marketing Content Validator")
153
+ gr.Markdown("Paste your marketing content below to check for potential issues.")
154
+
155
+ with gr.Row():
156
+ with gr.Column():
157
+ input_text = gr.TextArea(
158
+ label="Marketing Content",
159
+ placeholder="Enter your marketing content here (max 1000 characters)...",
160
+ lines=10
161
+ )
162
+ analyze_btn = gr.Button("Analyze Content")
163
+
164
+ with gr.Column():
165
+ output_text = gr.TextArea(
166
+ label="Analysis Results",
167
+ lines=10,
168
+ interactive=False
169
+ )
170
+ report_output = gr.File(label="Download Report")
171
+
172
+ analyze_btn.click(
173
+ fn=lambda text: (
174
+ format_results(analyze_content(text)[0]),
175
+ analyze_content(text)[1]
176
+ ),
177
+ inputs=input_text,
178
+ outputs=[output_text, report_output]
179
+ )
180
+
181
+ gr.Markdown("""
182
+ ### Notes:
183
+ - Maximum text length: 1000 characters
184
+ - Analysis may take up to 2 minutes
185
+ - Results include checks for:
186
+ - Text length
187
+ - Hate speech and bias
188
+ - Grammar
189
+ - Current events context
190
+ """)
191
+
192
+ return interface
193
+
194
+ # Launch the application
195
+ if __name__ == "__main__":
196
+ interface = create_interface()
197
+ interface.launch()
news-checker.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from newsapi import NewsApiClient
3
+ from dotenv import load_dotenv
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+
7
+ load_dotenv()
8
+
9
+ class NewsChecker:
10
+ def __init__(self):
11
+ self.api_key = os.getenv('NEWS_API_KEY')
12
+ self.newsapi = NewsApiClient(api_key=self.api_key)
13
+
14
+ def get_recent_news(self):
15
+ try:
16
+ # Get news from the last 7 days
17
+ week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
18
+ response = self.newsapi.get_everything(
19
+ q='',
20
+ from_param=week_ago,
21
+ language='en',
22
+ sort_by='relevancy',
23
+ page_size=100
24
+ )
25
+
26
+ if response['status'] == 'ok':
27
+ articles = response['articles']
28
+ # Extract titles and descriptions
29
+ news_data = [
30
+ {
31
+ 'title': article['title'],
32
+ 'description': article['description']
33
+ }
34
+ for article in articles if article['description']
35
+ ]
36
+ return pd.DataFrame(news_data)
37
+ return pd.DataFrame()
38
+
39
+ except Exception as e:
40
+ print(f"Error fetching news: {str(e)}")
41
+ return pd.DataFrame()
42
+
43
+ def check_content_against_news(self, marketing_text):
44
+ news_df = self.get_recent_news()
45
+ if news_df.empty:
46
+ return {
47
+ 'status': 'warning',
48
+ 'message': 'Unable to check against current news context. Proceed with caution.'
49
+ }
50
+
51
+ # Simple keyword matching for demo purposes
52
+ # In a production environment, you'd want to use more sophisticated NLP techniques
53
+ marketing_words = set(marketing_text.lower().split())
54
+ potential_conflicts = []
55
+
56
+ for _, row in news_df.iterrows():
57
+ title_words = set(row['title'].lower().split())
58
+ desc_words = set(str(row['description']).lower().split())
59
+
60
+ # Check for significant word overlap
61
+ if len(marketing_words.intersection(title_words)) >= 3:
62
+ potential_conflicts.append(row['title'])
63
+
64
+ if potential_conflicts:
65
+ return {
66
+ 'status': 'warning',
67
+ 'message': 'Potential conflicts found with current news:\n- ' + '\n- '.join(potential_conflicts)
68
+ }
69
+
70
+ return {
71
+ 'status': 'pass',
72
+ 'message': 'No significant conflicts with current news found.'
73
+ }
pdf_generator.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fpdf import FPDF
2
+ from datetime import datetime
3
+
4
+ class ReportGenerator:
5
+ def __init__(self):
6
+ self.pdf = FPDF()
7
+ self.pdf.add_page()
8
+ self.pdf.set_font("Arial", size=12)
9
+
10
+ def add_header(self):
11
+ self.pdf.set_font("Arial", "B", 16)
12
+ self.pdf.cell(200, 10, txt="Marketing Content Validation Report", ln=True, align='C')
13
+ self.pdf.set_font("Arial", size=10)
14
+ self.pdf.cell(200, 10, txt=f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align='R')
15
+ self.pdf.ln(10)
16
+
17
+ def add_input_text(self, text):
18
+ self.pdf.set_font("Arial", "B", 12)
19
+ self.pdf.cell(200, 10, txt="Input Marketing Content:", ln=True)
20
+ self.pdf.set_font("Arial", size=12)
21
+ self.pdf.multi_cell(0, 10, txt=text)
22
+ self.pdf.ln(10)
23
+
24
+ def add_check_result(self, check_name, status, details=None):
25
+ status_symbols = {
26
+ "pass": "✓",
27
+ "fail": "✗",
28
+ "warning": "!"
29
+ }
30
+
31
+ self.pdf.set_font("Arial", "B", 12)
32
+ status_symbol = status_symbols.get(status.lower(), "?")
33
+ self.pdf.cell(0, 10, txt=f"{check_name}: {status_symbol}", ln=True)
34
+
35
+ if details:
36
+ self.pdf.set_font("Arial", size=10)
37
+ self.pdf.multi_cell(0, 10, txt=details)
38
+ self.pdf.ln(5)
39
+
40
+ def save_report(self):
41
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
42
+ filename = f"marketing_report_{timestamp}.pdf"
43
+ self.pdf.output(filename)
44
+ return filename
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.19.2
2
+ transformers==4.36.2
3
+ torch==2.2.0
4
+ newsapi-python==0.2.7
5
+ fpdf2==2.7.8
6
+ pandas==2.1.4
7
+ numpy==1.24.3
8
+ requests==2.31.0
9
+ python-dotenv==1.0.0