Spaces:
Running
Running
File size: 3,633 Bytes
8c35d87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
from transformers import AutoModel, AutoTokenizer
import torch
# Load model and tokenizer
model_name = "ucaslcl/GOT-OCR2_0"
tokenizer = AutoTokenizer.from_pretrained(
model_name, trust_remote_code=True, return_tensors='pt'
)
# Load the model
model = AutoModel.from_pretrained(
model_name,
trust_remote_code=True,
low_cpu_mem_usage=True,
use_safetensors=True,
pad_token_id=tokenizer.eos_token_id,
)
# Ensure the model is in evaluation mode and loaded on CPU
device = torch.device("cpu")
dtype = torch.float32 # Use float32 on CPU
model = model.eval().to(device)
# OCR function
def extract_text_got(uploaded_file):
"""Use GOT-OCR2.0 model to extract text from the uploaded image."""
try:
temp_file_path = 'temp_image.jpg'
with open(temp_file_path, 'wb') as temp_file:
temp_file.write(uploaded_file.read()) # Save file
# OCR attempts
ocr_types = ['ocr', 'format']
fine_grained_options = ['ocr', 'format']
color_options = ['red', 'green', 'blue']
box = [10, 10, 100, 100] # Example box for demonstration
multi_crop_types = ['ocr', 'format']
results = []
# Run the model without autocast (not necessary for CPU)
for ocr_type in ocr_types:
with torch.no_grad():
outputs = model.chat(
tokenizer, temp_file_path, ocr_type=ocr_type
)
if isinstance(outputs, list) and outputs[0].strip():
return outputs[0].strip() # Return if successful
results.append(outputs[0].strip() if outputs else "No result")
# Try FINE-GRAINED OCR with box options
for ocr_type in fine_grained_options:
with torch.no_grad():
outputs = model.chat(
tokenizer, temp_file_path, ocr_type=ocr_type, ocr_box=box
)
if isinstance(outputs, list) and outputs[0].strip():
return outputs[0].strip() # Return if successful
results.append(outputs[0].strip() if outputs else "No result")
# Try FINE-GRAINED OCR with color options
for ocr_type in fine_grained_options:
for color in color_options:
with torch.no_grad():
outputs = model.chat(
tokenizer, temp_file_path, ocr_type=ocr_type, ocr_color=color
)
if isinstance(outputs, list) and outputs[0].strip():
return outputs[0].strip() # Return if successful
results.append(outputs[0].strip()
if outputs else "No result")
# Try MULTI-CROP OCR
for ocr_type in multi_crop_types:
with torch.no_grad():
outputs = model.chat_crop(
tokenizer, temp_file_path, ocr_type=ocr_type
)
if isinstance(outputs, list) and outputs[0].strip():
return outputs[0].strip() # Return if successful
results.append(outputs[0].strip() if outputs else "No result")
# If no text was extracted
if all(not text for text in results):
return "No text extracted."
else:
return "\n".join(results)
except Exception as e:
return f"Error during text extraction: {str(e)}"
finally:
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
|