Ozgur Unlu commited on
Commit
a7de25c
·
1 Parent(s): 72d5e40

error fixes for grammar check

Browse files
Files changed (2) hide show
  1. app.py +25 -17
  2. requirements.txt +1 -1
app.py CHANGED
@@ -8,7 +8,7 @@ import os
8
  from pdf_generator import ReportGenerator
9
  from news_checker import NewsChecker
10
  from dotenv import load_dotenv
11
- import language_tool_python # For spell checking
12
 
13
  load_dotenv()
14
 
@@ -20,11 +20,11 @@ def load_models():
20
  hate_model = AutoModelForSequenceClassification.from_pretrained(model_name)
21
 
22
  # Initialize spell checker
23
- spell_tool = language_tool_python.LanguageTool('en-US')
24
 
25
  return {
26
  'hate_speech': (hate_model, hate_tokenizer),
27
- 'spell_check': spell_tool
28
  }
29
 
30
  # Initialize news checker
@@ -68,23 +68,31 @@ def check_hate_speech_and_bias(text, model, tokenizer):
68
  'message': f'Error in hate speech/bias detection: {str(e)}'
69
  }
70
 
71
- def check_spelling(text, spell_tool):
72
  try:
73
- matches = spell_tool.check(text)
74
- spelling_errors = []
 
75
 
76
- for match in matches:
77
- if match.ruleId in ['MORFOLOGIK_RULE_EN_US', 'TYPOS']: # Only check spelling errors
78
- error_word = text[match.offset:match.offset + match.errorLength]
79
- suggestions = match.replacements[:3] # Limit to top 3 suggestions
80
- if suggestions:
81
- spelling_errors.append(f"'{error_word}' -> suggestions: {', '.join(suggestions)}")
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- if spelling_errors:
84
- return {
85
- 'status': 'warning',
86
- 'message': 'Misspelled words found:\n' + '\n'.join(spelling_errors)
87
- }
88
  return {
89
  'status': 'pass',
90
  'message': 'No spelling errors detected'
 
8
  from pdf_generator import ReportGenerator
9
  from news_checker import NewsChecker
10
  from dotenv import load_dotenv
11
+ from spellchecker import SpellChecker
12
 
13
  load_dotenv()
14
 
 
20
  hate_model = AutoModelForSequenceClassification.from_pretrained(model_name)
21
 
22
  # Initialize spell checker
23
+ spell = SpellChecker()
24
 
25
  return {
26
  'hate_speech': (hate_model, hate_tokenizer),
27
+ 'spell_check': spell
28
  }
29
 
30
  # Initialize news checker
 
68
  'message': f'Error in hate speech/bias detection: {str(e)}'
69
  }
70
 
71
+ def check_spelling(text, spell_checker):
72
  try:
73
+ # Split text into words and clean them
74
+ words = text.replace('\n', ' ').split()
75
+ words = [word.strip('.,!?()[]{}":;') for word in words]
76
 
77
+ # Find misspelled words
78
+ misspelled = spell_checker.unknown(words)
79
+
80
+ if misspelled:
81
+ corrections = []
82
+ for word in misspelled:
83
+ # Get the most likely corrections
84
+ candidates = spell_checker.candidates(word)
85
+ if candidates:
86
+ # Take up to 3 suggestions
87
+ suggestions = list(candidates)[:3]
88
+ corrections.append(f"'{word}' -> suggestions: {', '.join(suggestions)}")
89
+
90
+ if corrections:
91
+ return {
92
+ 'status': 'warning',
93
+ 'message': 'Misspelled words found:\n' + '\n'.join(corrections)
94
+ }
95
 
 
 
 
 
 
96
  return {
97
  'status': 'pass',
98
  'message': 'No spelling errors detected'
requirements.txt CHANGED
@@ -8,4 +8,4 @@ pandas==2.1.4
8
  numpy==1.24.3
9
  requests==2.31.0
10
  python-dotenv==1.0.0
11
- language-tool-python==2.7.1
 
8
  numpy==1.24.3
9
  requests==2.31.0
10
  python-dotenv==1.0.0
11
+ spellchecker==0.7.2