Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,7 @@ def ocr_with_easy(img):
|
|
26 |
reader = easyocr.Reader(['en'])
|
27 |
bounds = reader.readtext('image.png', paragraph="False", detail=0)
|
28 |
extracted_text = ' '.join(bounds)
|
|
|
29 |
return extracted_text
|
30 |
|
31 |
"""
|
@@ -37,7 +38,9 @@ def preprocess_text(text):
|
|
37 |
filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
|
38 |
stemmer = PorterStemmer()
|
39 |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
|
40 |
-
|
|
|
|
|
41 |
|
42 |
"""
|
43 |
Load and Train Spam Classifier
|
@@ -67,9 +70,12 @@ def ocr_and_classify_spam(img):
|
|
67 |
# Step 2: Preprocess and classify the extracted text
|
68 |
if extracted_text:
|
69 |
processed_text = preprocess_text(extracted_text)
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
73 |
else:
|
74 |
spam_result = "No text found in the image."
|
75 |
|
|
|
26 |
reader = easyocr.Reader(['en'])
|
27 |
bounds = reader.readtext('image.png', paragraph="False", detail=0)
|
28 |
extracted_text = ' '.join(bounds)
|
29 |
+
print("Extracted Text:", extracted_text) # Debugging line
|
30 |
return extracted_text
|
31 |
|
32 |
"""
|
|
|
38 |
filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
|
39 |
stemmer = PorterStemmer()
|
40 |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
|
41 |
+
processed_text = ' '.join(stemmed_tokens)
|
42 |
+
print("Processed Text:", processed_text) # Debugging line
|
43 |
+
return processed_text
|
44 |
|
45 |
"""
|
46 |
Load and Train Spam Classifier
|
|
|
70 |
# Step 2: Preprocess and classify the extracted text
|
71 |
if extracted_text:
|
72 |
processed_text = preprocess_text(extracted_text)
|
73 |
+
if processed_text: # Check if text is not empty after preprocessing
|
74 |
+
input_tfidf = tfidf_vectorizer.transform([processed_text])
|
75 |
+
prediction = rf_classifier.predict(input_tfidf)
|
76 |
+
spam_result = "SPAM" if prediction[0] == 1 else "NOT SPAM"
|
77 |
+
else:
|
78 |
+
spam_result = "No valid text to classify."
|
79 |
else:
|
80 |
spam_result = "No text found in the image."
|
81 |
|