winamnd commited on
Commit
70ac79e
·
verified ·
1 Parent(s): ed06b10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -26,6 +26,7 @@ def ocr_with_easy(img):
26
  reader = easyocr.Reader(['en'])
27
  bounds = reader.readtext('image.png', paragraph="False", detail=0)
28
  extracted_text = ' '.join(bounds)
 
29
  return extracted_text
30
 
31
  """
@@ -37,7 +38,9 @@ def preprocess_text(text):
37
  filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
38
  stemmer = PorterStemmer()
39
  stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
40
- return ' '.join(stemmed_tokens)
 
 
41
 
42
  """
43
  Load and Train Spam Classifier
@@ -67,9 +70,12 @@ def ocr_and_classify_spam(img):
67
  # Step 2: Preprocess and classify the extracted text
68
  if extracted_text:
69
  processed_text = preprocess_text(extracted_text)
70
- input_tfidf = tfidf_vectorizer.transform([processed_text])
71
- prediction = rf_classifier.predict(input_tfidf)
72
- spam_result = "SPAM" if prediction[0] == 1 else "NOT SPAM"
 
 
 
73
  else:
74
  spam_result = "No text found in the image."
75
 
 
26
  reader = easyocr.Reader(['en'])
27
  bounds = reader.readtext('image.png', paragraph="False", detail=0)
28
  extracted_text = ' '.join(bounds)
29
+ print("Extracted Text:", extracted_text) # Debugging line
30
  return extracted_text
31
 
32
  """
 
38
  filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
39
  stemmer = PorterStemmer()
40
  stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
41
+ processed_text = ' '.join(stemmed_tokens)
42
+ print("Processed Text:", processed_text) # Debugging line
43
+ return processed_text
44
 
45
  """
46
  Load and Train Spam Classifier
 
70
  # Step 2: Preprocess and classify the extracted text
71
  if extracted_text:
72
  processed_text = preprocess_text(extracted_text)
73
+ if processed_text: # Check if text is not empty after preprocessing
74
+ input_tfidf = tfidf_vectorizer.transform([processed_text])
75
+ prediction = rf_classifier.predict(input_tfidf)
76
+ spam_result = "SPAM" if prediction[0] == 1 else "NOT SPAM"
77
+ else:
78
+ spam_result = "No valid text to classify."
79
  else:
80
  spam_result = "No text found in the image."
81