winamnd commited on
Commit
14299e0
·
verified ·
1 Parent(s): 07dea49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -13
app.py CHANGED
@@ -19,12 +19,12 @@ RESULTS_CSV = "ocr_results.csv"
19
 
20
  # Ensure model exists
21
  if not os.path.exists(os.path.join(MODEL_PATH, "pytorch_model.bin")):
22
- print(f" Model not found in {MODEL_PATH}. Downloading from Hugging Face Hub...")
23
  model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
24
  model.save_pretrained(MODEL_PATH)
25
  tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
26
  tokenizer.save_pretrained(MODEL_PATH)
27
- print(f" Model saved at {MODEL_PATH}.")
28
  else:
29
  model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
30
  tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
@@ -47,13 +47,6 @@ def ocr_with_easy(img):
47
  results = reader.readtext(gray_image, detail=0)
48
  return ' '.join(results)
49
 
50
- # Preprocess Text
51
- def preprocess_text(text):
52
- # Clean up the text by removing unwanted characters
53
- text = text.strip() # Remove leading/trailing whitespace
54
- text = ' '.join(text.split()) # Normalize spaces
55
- return text
56
-
57
  # OCR Function
58
  def generate_ocr(method, img):
59
  if img is None:
@@ -70,9 +63,6 @@ def generate_ocr(method, img):
70
  else: # KerasOCR
71
  text_output = ocr_with_keras(img)
72
 
73
- # Preprocess the text before feeding to the model
74
- text_output = preprocess_text(text_output)
75
-
76
  # Classify Text as Spam or Not Spam
77
  inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
78
 
@@ -106,4 +96,4 @@ demo = gr.Interface(
106
 
107
  # Launch App
108
  if __name__ == "__main__":
109
- demo.launch()
 
19
 
20
  # Ensure model exists
21
  if not os.path.exists(os.path.join(MODEL_PATH, "pytorch_model.bin")):
22
+ print(f"⚠️ Model not found in {MODEL_PATH}. Downloading from Hugging Face Hub...")
23
  model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
24
  model.save_pretrained(MODEL_PATH)
25
  tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
26
  tokenizer.save_pretrained(MODEL_PATH)
27
+ print(f" Model saved at {MODEL_PATH}.")
28
  else:
29
  model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
30
  tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
 
47
  results = reader.readtext(gray_image, detail=0)
48
  return ' '.join(results)
49
 
 
 
 
 
 
 
 
50
  # OCR Function
51
  def generate_ocr(method, img):
52
  if img is None:
 
63
  else: # KerasOCR
64
  text_output = ocr_with_keras(img)
65
 
 
 
 
66
  # Classify Text as Spam or Not Spam
67
  inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
68
 
 
96
 
97
  # Launch App
98
  if __name__ == "__main__":
99
+ demo.launch()