Ozgur Unlu
commited on
Commit
·
5b4acc0
1
Parent(s):
d1f1bbf
fixes spell check and add mac length
Browse files
app.py
CHANGED
@@ -50,28 +50,18 @@ def load_models():
|
|
50 |
# Initialize news checker
|
51 |
news_checker = NewsChecker()
|
52 |
|
53 |
-
def check_text_length(text
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
return {
|
58 |
-
'status': 'fail',
|
59 |
-
'message': 'Maximum length cannot exceed 1000 characters'
|
60 |
-
}
|
61 |
-
if len(text) > max_length:
|
62 |
-
return {
|
63 |
-
'status': 'fail',
|
64 |
-
'message': f'Text exceeds {max_length} character limit'
|
65 |
-
}
|
66 |
-
return {
|
67 |
-
'status': 'pass',
|
68 |
-
'message': 'Text length is within limits'
|
69 |
-
}
|
70 |
-
except ValueError:
|
71 |
return {
|
72 |
'status': 'fail',
|
73 |
-
'message': '
|
74 |
}
|
|
|
|
|
|
|
|
|
75 |
|
76 |
def check_hate_speech_and_bias(text, model, tokenizer):
|
77 |
try:
|
@@ -184,8 +174,8 @@ def check_spelling(text, spell_checker):
|
|
184 |
# Normalize apostrophes in the word
|
185 |
word = normalize_apostrophes(word)
|
186 |
|
187 |
-
# Remove surrounding punctuation but keep internal apostrophes
|
188 |
-
cleaned = re.sub(r'^[^\w\']+|[^\w\']+$', '', word)
|
189 |
|
190 |
# Skip empty strings
|
191 |
if not cleaned:
|
@@ -194,6 +184,18 @@ def check_spelling(text, spell_checker):
|
|
194 |
# Skip if the word is in our contractions list
|
195 |
if cleaned.lower() in CONTRACTIONS:
|
196 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
# Skip special cases
|
199 |
if (cleaned.isdigit() or # Skip numbers
|
@@ -225,7 +227,7 @@ def check_spelling(text, spell_checker):
|
|
225 |
'message': f'Error in spell check: {str(e)}'
|
226 |
}
|
227 |
|
228 |
-
def analyze_content(text
|
229 |
try:
|
230 |
# Initialize report generator
|
231 |
report_gen = ReportGenerator()
|
@@ -239,7 +241,7 @@ def analyze_content(text, max_length):
|
|
239 |
results = {}
|
240 |
|
241 |
# 1. Length Check
|
242 |
-
length_result = check_text_length(text
|
243 |
results['Length Check'] = length_result
|
244 |
report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
|
245 |
|
@@ -306,14 +308,6 @@ def create_interface():
|
|
306 |
|
307 |
with gr.Row():
|
308 |
with gr.Column():
|
309 |
-
max_length = gr.Number(
|
310 |
-
label="Check for max length:",
|
311 |
-
value=500,
|
312 |
-
minimum=1,
|
313 |
-
maximum=1000,
|
314 |
-
step=1,
|
315 |
-
interactive=True
|
316 |
-
)
|
317 |
input_text = gr.TextArea(
|
318 |
label="Marketing Content",
|
319 |
placeholder="Enter your marketing content here (max 1000 characters)...",
|
@@ -330,17 +324,17 @@ def create_interface():
|
|
330 |
report_output = gr.File(label="Download Report")
|
331 |
|
332 |
analyze_btn.click(
|
333 |
-
fn=lambda text
|
334 |
-
format_results(analyze_content(text
|
335 |
-
analyze_content(text
|
336 |
),
|
337 |
-
inputs=
|
338 |
outputs=[output_text, report_output]
|
339 |
)
|
340 |
|
341 |
gr.Markdown("""
|
342 |
### Notes:
|
343 |
-
- Maximum
|
344 |
- Analysis may take up to 2 minutes
|
345 |
- Results include checks for:
|
346 |
- Text length
|
|
|
50 |
# Initialize news checker
|
51 |
news_checker = NewsChecker()
|
52 |
|
53 |
+
def check_text_length(text):
|
54 |
+
"""Check if text length is within the 1000 character limit and return character count"""
|
55 |
+
char_count = len(text)
|
56 |
+
if char_count > 1000:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return {
|
58 |
'status': 'fail',
|
59 |
+
'message': f'Text length: {char_count}/1000 characters (exceeds maximum limit)'
|
60 |
}
|
61 |
+
return {
|
62 |
+
'status': 'pass',
|
63 |
+
'message': f'Text length: {char_count}/1000 characters'
|
64 |
+
}
|
65 |
|
66 |
def check_hate_speech_and_bias(text, model, tokenizer):
|
67 |
try:
|
|
|
174 |
# Normalize apostrophes in the word
|
175 |
word = normalize_apostrophes(word)
|
176 |
|
177 |
+
# Remove surrounding punctuation but keep internal apostrophes and hyphens
|
178 |
+
cleaned = re.sub(r'^[^\w\'\-]+|[^\w\'\-]+$', '', word)
|
179 |
|
180 |
# Skip empty strings
|
181 |
if not cleaned:
|
|
|
184 |
# Skip if the word is in our contractions list
|
185 |
if cleaned.lower() in CONTRACTIONS:
|
186 |
continue
|
187 |
+
|
188 |
+
# Handle hyphenated words
|
189 |
+
if '-' in cleaned:
|
190 |
+
parts = cleaned.split('-')
|
191 |
+
# Check if each part is valid
|
192 |
+
all_parts_valid = all(
|
193 |
+
part.lower() in spell_checker.word_frequency
|
194 |
+
for part in parts
|
195 |
+
if part # Skip empty parts
|
196 |
+
)
|
197 |
+
if all_parts_valid:
|
198 |
+
continue
|
199 |
|
200 |
# Skip special cases
|
201 |
if (cleaned.isdigit() or # Skip numbers
|
|
|
227 |
'message': f'Error in spell check: {str(e)}'
|
228 |
}
|
229 |
|
230 |
+
def analyze_content(text):
|
231 |
try:
|
232 |
# Initialize report generator
|
233 |
report_gen = ReportGenerator()
|
|
|
241 |
results = {}
|
242 |
|
243 |
# 1. Length Check
|
244 |
+
length_result = check_text_length(text)
|
245 |
results['Length Check'] = length_result
|
246 |
report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
|
247 |
|
|
|
308 |
|
309 |
with gr.Row():
|
310 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
input_text = gr.TextArea(
|
312 |
label="Marketing Content",
|
313 |
placeholder="Enter your marketing content here (max 1000 characters)...",
|
|
|
324 |
report_output = gr.File(label="Download Report")
|
325 |
|
326 |
analyze_btn.click(
|
327 |
+
fn=lambda text: (
|
328 |
+
format_results(analyze_content(text)[0]),
|
329 |
+
analyze_content(text)[1]
|
330 |
),
|
331 |
+
inputs=input_text,
|
332 |
outputs=[output_text, report_output]
|
333 |
)
|
334 |
|
335 |
gr.Markdown("""
|
336 |
### Notes:
|
337 |
+
- Maximum text length: 1000 characters
|
338 |
- Analysis may take up to 2 minutes
|
339 |
- Results include checks for:
|
340 |
- Text length
|