Spaces:

winamnd
/

ocr-llm-test

Running

App Files Files Community

winamnd commited on Feb 17

Commit

7dedea0

verified ·

1 Parent(s): 2a250f6

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -73,6 +73,24 @@ def ocr_with_tesseract(img):
     confidences = [1.0] * len(extracted_text)  # Tesseract doesn't return confidence scores
     return extracted_text, confidences
 # OCR & Classification Function
 def generate_ocr(method, img):
     if img is None:
@@ -83,57 +101,33 @@ def generate_ocr(method, img):
     # Select OCR method
     if method == "PaddleOCR":
-        extracted_text = ocr_with_paddle(img)
     elif method == "EasyOCR":
-        extracted_text = ocr_with_easy(img)
-    elif method == "KerasOCR":
-        extracted_text = ocr_with_keras(img)
-    elif method == "TesseractOCR":
-        extracted_text, _ = ocr_with_tesseract(img)  # Ignore confidence values
-    else:
-        return "Invalid OCR method", "N/A"
-    # Clean text
-    extracted_text = extracted_text.strip()
-    if not extracted_text:
-        return "No text detected!", "Cannot classify"
-    # Debugging: Print extracted text
-    print(f"Extracted Text: {extracted_text}")
-    # Tokenize input
-    inputs = tokenizer(
-        extracted_text,
-        return_tensors="pt",
-        truncation=True,
-        padding="max_length",
-        max_length=512
-    )
-    # Move tensors to the same device as the model
-    inputs = {key: val.to(model.device) for key, val in inputs.items()}
     # Perform inference
     with torch.no_grad():
         outputs = model(**inputs)
-        logits = outputs.logits
-    # Debugging: Print logits
-    print(f"Logits: {logits}")
-    # Use argmax to classify
-    predicted_class = torch.argmax(logits, dim=1).item()
-    label_map = {0: "Not Spam", 1: "Spam"}
-    label = label_map.get(predicted_class, "Unknown")
-    # Debugging: Print final classification
-    print(f"Predicted Class: {predicted_class}, Label: {label}")
-    # Save results
-    save_results_to_repo(extracted_text, label)
-    return extracted_text, label
 # Gradio Interface
 image_input = gr.Image()

     confidences = [1.0] * len(extracted_text)  # Tesseract doesn't return confidence scores
     return extracted_text, confidences
+# OCR & Classification Function
+def ocr_with_paddle(img):
+    ocr = PaddleOCR(lang='en', use_angle_cls=True)
+    result = ocr.ocr(img)
+    return ' '.join([item[1][0] for item in result[0]])
+def ocr_with_keras(img):
+    pipeline = keras_ocr.pipeline.Pipeline()
+    images = [keras_ocr.tools.read(img)]
+    predictions = pipeline.recognize(images)
+    return ' '.join([text for text, _ in predictions[0]])
+def ocr_with_easy(img):
+    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    reader = easyocr.Reader(['en'])
+    results = reader.readtext(gray_image, detail=0)
+    return ' '.join(results)
 # OCR & Classification Function
 def generate_ocr(method, img):
     if img is None:
     # Select OCR method
     if method == "PaddleOCR":
+        text_output = ocr_with_paddle(img)
     elif method == "EasyOCR":
+        text_output = ocr_with_easy(img)
+    else:  # KerasOCR
+        text_output = ocr_with_keras(img)
+    # Preprocess text properly
+    text_output = text_output.strip()
+    if len(text_output) == 0:
+        return "No text detected!", "Cannot classify"
+    # Tokenize text
+    inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Perform inference
     with torch.no_grad():
         outputs = model(**inputs)
+        probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
+        spam_prob = probs[0][1].item()  # Probability of Spam
+    # Adjust classification based on threshold (better than argmax)
+    label = "Spam" if spam_prob > 0.5 else "Not Spam"
+    # Save results using external function
+    save_results_to_repo(text_output, label)
+    return text_output, label
 # Gradio Interface
 image_input = gr.Image()