Ozgur Unlu commited on
Commit
5b4acc0
·
1 Parent(s): d1f1bbf

fixes spell check and add mac length

Browse files
Files changed (1) hide show
  1. app.py +30 -36
app.py CHANGED
@@ -50,28 +50,18 @@ def load_models():
50
  # Initialize news checker
51
  news_checker = NewsChecker()
52
 
53
- def check_text_length(text, max_length):
54
- try:
55
- max_length = int(max_length)
56
- if max_length > 1000:
57
- return {
58
- 'status': 'fail',
59
- 'message': 'Maximum length cannot exceed 1000 characters'
60
- }
61
- if len(text) > max_length:
62
- return {
63
- 'status': 'fail',
64
- 'message': f'Text exceeds {max_length} character limit'
65
- }
66
- return {
67
- 'status': 'pass',
68
- 'message': 'Text length is within limits'
69
- }
70
- except ValueError:
71
  return {
72
  'status': 'fail',
73
- 'message': 'Invalid maximum length value'
74
  }
 
 
 
 
75
 
76
  def check_hate_speech_and_bias(text, model, tokenizer):
77
  try:
@@ -184,8 +174,8 @@ def check_spelling(text, spell_checker):
184
  # Normalize apostrophes in the word
185
  word = normalize_apostrophes(word)
186
 
187
- # Remove surrounding punctuation but keep internal apostrophes
188
- cleaned = re.sub(r'^[^\w\']+|[^\w\']+$', '', word)
189
 
190
  # Skip empty strings
191
  if not cleaned:
@@ -194,6 +184,18 @@ def check_spelling(text, spell_checker):
194
  # Skip if the word is in our contractions list
195
  if cleaned.lower() in CONTRACTIONS:
196
  continue
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  # Skip special cases
199
  if (cleaned.isdigit() or # Skip numbers
@@ -225,7 +227,7 @@ def check_spelling(text, spell_checker):
225
  'message': f'Error in spell check: {str(e)}'
226
  }
227
 
228
- def analyze_content(text, max_length):
229
  try:
230
  # Initialize report generator
231
  report_gen = ReportGenerator()
@@ -239,7 +241,7 @@ def analyze_content(text, max_length):
239
  results = {}
240
 
241
  # 1. Length Check
242
- length_result = check_text_length(text, max_length)
243
  results['Length Check'] = length_result
244
  report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
245
 
@@ -306,14 +308,6 @@ def create_interface():
306
 
307
  with gr.Row():
308
  with gr.Column():
309
- max_length = gr.Number(
310
- label="Check for max length:",
311
- value=500,
312
- minimum=1,
313
- maximum=1000,
314
- step=1,
315
- interactive=True
316
- )
317
  input_text = gr.TextArea(
318
  label="Marketing Content",
319
  placeholder="Enter your marketing content here (max 1000 characters)...",
@@ -330,17 +324,17 @@ def create_interface():
330
  report_output = gr.File(label="Download Report")
331
 
332
  analyze_btn.click(
333
- fn=lambda text, max_len: (
334
- format_results(analyze_content(text, max_len)[0]),
335
- analyze_content(text, max_len)[1]
336
  ),
337
- inputs=[input_text, max_length],
338
  outputs=[output_text, report_output]
339
  )
340
 
341
  gr.Markdown("""
342
  ### Notes:
343
- - Maximum allowed length: 1000 characters
344
  - Analysis may take up to 2 minutes
345
  - Results include checks for:
346
  - Text length
 
50
  # Initialize news checker
51
  news_checker = NewsChecker()
52
 
53
+ def check_text_length(text):
54
+ """Check if text length is within the 1000 character limit and return character count"""
55
+ char_count = len(text)
56
+ if char_count > 1000:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return {
58
  'status': 'fail',
59
+ 'message': f'Text length: {char_count}/1000 characters (exceeds maximum limit)'
60
  }
61
+ return {
62
+ 'status': 'pass',
63
+ 'message': f'Text length: {char_count}/1000 characters'
64
+ }
65
 
66
  def check_hate_speech_and_bias(text, model, tokenizer):
67
  try:
 
174
  # Normalize apostrophes in the word
175
  word = normalize_apostrophes(word)
176
 
177
+ # Remove surrounding punctuation but keep internal apostrophes and hyphens
178
+ cleaned = re.sub(r'^[^\w\'\-]+|[^\w\'\-]+$', '', word)
179
 
180
  # Skip empty strings
181
  if not cleaned:
 
184
  # Skip if the word is in our contractions list
185
  if cleaned.lower() in CONTRACTIONS:
186
  continue
187
+
188
+ # Handle hyphenated words
189
+ if '-' in cleaned:
190
+ parts = cleaned.split('-')
191
+ # Check if each part is valid
192
+ all_parts_valid = all(
193
+ part.lower() in spell_checker.word_frequency
194
+ for part in parts
195
+ if part # Skip empty parts
196
+ )
197
+ if all_parts_valid:
198
+ continue
199
 
200
  # Skip special cases
201
  if (cleaned.isdigit() or # Skip numbers
 
227
  'message': f'Error in spell check: {str(e)}'
228
  }
229
 
230
+ def analyze_content(text):
231
  try:
232
  # Initialize report generator
233
  report_gen = ReportGenerator()
 
241
  results = {}
242
 
243
  # 1. Length Check
244
+ length_result = check_text_length(text)
245
  results['Length Check'] = length_result
246
  report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
247
 
 
308
 
309
  with gr.Row():
310
  with gr.Column():
 
 
 
 
 
 
 
 
311
  input_text = gr.TextArea(
312
  label="Marketing Content",
313
  placeholder="Enter your marketing content here (max 1000 characters)...",
 
324
  report_output = gr.File(label="Download Report")
325
 
326
  analyze_btn.click(
327
+ fn=lambda text: (
328
+ format_results(analyze_content(text)[0]),
329
+ analyze_content(text)[1]
330
  ),
331
+ inputs=input_text,
332
  outputs=[output_text, report_output]
333
  )
334
 
335
  gr.Markdown("""
336
  ### Notes:
337
+ - Maximum text length: 1000 characters
338
  - Analysis may take up to 2 minutes
339
  - Results include checks for:
340
  - Text length