Spaces:

ozgurunlu
/

m-check

Sleeping

App Files Files Community

Ozgur Unlu commited on Nov 6, 2024

Commit

5b4acc0

1 Parent(s): d1f1bbf

fixes spell check and add mac length

Browse files

Files changed (1) hide show

app.py +30 -36

app.py CHANGED Viewed

@@ -50,28 +50,18 @@ def load_models():
 # Initialize news checker
 news_checker = NewsChecker()
-def check_text_length(text, max_length):
-    try:
-        max_length = int(max_length)
-        if max_length > 1000:
-            return {
-                'status': 'fail',
-                'message': 'Maximum length cannot exceed 1000 characters'
-            }
-        if len(text) > max_length:
-            return {
-                'status': 'fail',
-                'message': f'Text exceeds {max_length} character limit'
-            }
-        return {
-            'status': 'pass',
-            'message': 'Text length is within limits'
-        }
-    except ValueError:
         return {
             'status': 'fail',
-            'message': 'Invalid maximum length value'
         }
 def check_hate_speech_and_bias(text, model, tokenizer):
     try:
@@ -184,8 +174,8 @@ def check_spelling(text, spell_checker):
             # Normalize apostrophes in the word
             word = normalize_apostrophes(word)
-            # Remove surrounding punctuation but keep internal apostrophes
-            cleaned = re.sub(r'^[^\w\']+|[^\w\']+$', '', word)
             # Skip empty strings
             if not cleaned:
@@ -194,6 +184,18 @@ def check_spelling(text, spell_checker):
             # Skip if the word is in our contractions list
             if cleaned.lower() in CONTRACTIONS:
                 continue
             # Skip special cases
             if (cleaned.isdigit() or               # Skip numbers
@@ -225,7 +227,7 @@ def check_spelling(text, spell_checker):
             'message': f'Error in spell check: {str(e)}'
         }
-def analyze_content(text, max_length):
     try:
         # Initialize report generator
         report_gen = ReportGenerator()
@@ -239,7 +241,7 @@ def analyze_content(text, max_length):
         results = {}
         # 1. Length Check
-        length_result = check_text_length(text, max_length)
         results['Length Check'] = length_result
         report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
@@ -306,14 +308,6 @@ def create_interface():
         with gr.Row():
             with gr.Column():
-                max_length = gr.Number(
-                    label="Check for max length:",
-                    value=500,
-                    minimum=1,
-                    maximum=1000,
-                    step=1,
-                    interactive=True
-                )
                 input_text = gr.TextArea(
                     label="Marketing Content",
                     placeholder="Enter your marketing content here (max 1000 characters)...",
@@ -330,17 +324,17 @@ def create_interface():
                 report_output = gr.File(label="Download Report")
         analyze_btn.click(
-            fn=lambda text, max_len: (
-                format_results(analyze_content(text, max_len)[0]),
-                analyze_content(text, max_len)[1]
             ),
-            inputs=[input_text, max_length],
             outputs=[output_text, report_output]
         )
         gr.Markdown("""
         ### Notes:
-        - Maximum allowed length: 1000 characters
         - Analysis may take up to 2 minutes
         - Results include checks for:
           - Text length

 # Initialize news checker
 news_checker = NewsChecker()
+def check_text_length(text):
+    """Check if text length is within the 1000 character limit and return character count"""
+    char_count = len(text)
+    if char_count > 1000:
         return {
             'status': 'fail',
+            'message': f'Text length: {char_count}/1000 characters (exceeds maximum limit)'
         }
+    return {
+        'status': 'pass',
+        'message': f'Text length: {char_count}/1000 characters'
+    }
 def check_hate_speech_and_bias(text, model, tokenizer):
     try:
             # Normalize apostrophes in the word
             word = normalize_apostrophes(word)
+            # Remove surrounding punctuation but keep internal apostrophes and hyphens
+            cleaned = re.sub(r'^[^\w\'\-]+|[^\w\'\-]+$', '', word)
             # Skip empty strings
             if not cleaned:
             # Skip if the word is in our contractions list
             if cleaned.lower() in CONTRACTIONS:
                 continue
+            # Handle hyphenated words
+            if '-' in cleaned:
+                parts = cleaned.split('-')
+                # Check if each part is valid
+                all_parts_valid = all(
+                    part.lower() in spell_checker.word_frequency
+                    for part in parts
+                    if part  # Skip empty parts
+                )
+                if all_parts_valid:
+                    continue
             # Skip special cases
             if (cleaned.isdigit() or               # Skip numbers
             'message': f'Error in spell check: {str(e)}'
         }
+def analyze_content(text):
     try:
         # Initialize report generator
         report_gen = ReportGenerator()
         results = {}
         # 1. Length Check
+        length_result = check_text_length(text)
         results['Length Check'] = length_result
         report_gen.add_check_result("Length Check", length_result['status'], length_result['message'])
         with gr.Row():
             with gr.Column():
                 input_text = gr.TextArea(
                     label="Marketing Content",
                     placeholder="Enter your marketing content here (max 1000 characters)...",
                 report_output = gr.File(label="Download Report")
         analyze_btn.click(
+            fn=lambda text: (
+                format_results(analyze_content(text)[0]),
+                analyze_content(text)[1]
             ),
+            inputs=input_text,
             outputs=[output_text, report_output]
         )
         gr.Markdown("""
         ### Notes:
+        - Maximum text length: 1000 characters
         - Analysis may take up to 2 minutes
         - Results include checks for:
           - Text length