Spaces:

winamnd
/

ocr-llm-test

Running

winamnd commited on Feb 16

Commit

deb409e

verified ·

1 Parent(s): 1269497

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,8 +9,8 @@ import easyocr
 import keras_ocr
 from paddleocr import PaddleOCR
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
-import torch.nn.functional as F  # Added for softmax
-from save_results import save_results_to_repo  # Import the new save function
 # Paths
 MODEL_PATH = "./distilbert_spam_model"
@@ -29,6 +29,9 @@ else:
     model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
     tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
 # Load OCR Methods
 def ocr_with_paddle(img):
     ocr = PaddleOCR(lang='en', use_angle_cls=True)
@@ -63,6 +66,11 @@ def generate_ocr(method, img):
     else:  # KerasOCR
         text_output = ocr_with_keras(img)
     # Classify Text as Spam or Not Spam
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
@@ -71,7 +79,7 @@ def generate_ocr(method, img):
         probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
         prediction = torch.argmax(probs, dim=1).item()
-    label_map = {0: "Spam", 1: "Not Spam"}
     label = label_map[prediction]
     # Save results using the external save function
@@ -96,4 +104,4 @@ demo = gr.Interface(
 # Launch App
 if __name__ == "__main__":
-    demo.launch()

 import keras_ocr
 from paddleocr import PaddleOCR
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import torch.nn.functional as F
+from save_results import save_results_to_repo
 # Paths
 MODEL_PATH = "./distilbert_spam_model"
     model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
     tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
+# Set the model to evaluation mode to disable dropout layers
+model.eval()
 # Load OCR Methods
 def ocr_with_paddle(img):
     ocr = PaddleOCR(lang='en', use_angle_cls=True)
     else:  # KerasOCR
         text_output = ocr_with_keras(img)
+    # Clean and truncate the extracted text
+    text_output = text_output.strip()
+    if len(text_output) == 0:
+        return "No text detected!", "Cannot classify"
     # Classify Text as Spam or Not Spam
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
         probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
         prediction = torch.argmax(probs, dim=1).item()
+    label_map = {0: "Not Spam", 1: "Spam"}
     label = label_map[prediction]
     # Save results using the external save function
 # Launch App
 if __name__ == "__main__":
+    demo.launch()