Spaces:

winamnd
/

ocr-llm-test

Running

winamnd commited on Feb 17

Commit

ffe536a

verified ·

1 Parent(s): db8a1e5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -100,7 +100,7 @@ def generate_ocr(method, image):
     if len(text_output) == 0:
         return "No text detected!", "Cannot classify"
-    # Tokenize text for LLM classification
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Perform inference
@@ -108,27 +108,30 @@ def generate_ocr(method, image):
         outputs = model(**inputs)
         logits = outputs.logits  # Get raw logits
-    # Print raw logits for debugging
     print(f"Raw logits: {logits}")
-    # Convert logits to probabilities
     probs = F.softmax(logits, dim=1)
     # Extract probability values
     not_spam_prob = probs[0, 0].item()
     spam_prob = probs[0, 1].item()
-    # Print probabilities for debugging
     print(f"Not Spam Probability: {not_spam_prob}, Spam Probability: {spam_prob}")
-    # Use a classification threshold to avoid bias
-    threshold = 0.55  # Adjust based on observations
-    if spam_prob >= threshold:
         label = "Spam"
     else:
         label = "Not Spam"
-    # Save results using external function
     save_results_to_repo(text_output, label)
     return text_output, label

     if len(text_output) == 0:
         return "No text detected!", "Cannot classify"
+    # Tokenize text for classification
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Perform inference
         outputs = model(**inputs)
         logits = outputs.logits  # Get raw logits
+    # Print raw logits to debug
     print(f"Raw logits: {logits}")
+    # Convert logits to probabilities using softmax
     probs = F.softmax(logits, dim=1)
     # Extract probability values
     not_spam_prob = probs[0, 0].item()
     spam_prob = probs[0, 1].item()
+    # Print probability values for debugging
     print(f"Not Spam Probability: {not_spam_prob}, Spam Probability: {spam_prob}")
+    # Ensure correct label mapping
+    predicted_class = torch.argmax(probs, dim=1).item()  # Get predicted class index
+    print(f"Predicted Class Index: {predicted_class}")  # Debugging output
+    # Check if the labels are flipped
+    if predicted_class == 1:
         label = "Spam"
     else:
         label = "Not Spam"
+    # Save results
     save_results_to_repo(text_output, label)
     return text_output, label