File size: 1,652 Bytes
323d5ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import requests

def load_model():
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
    return processor, model

def process_image(image, processor, model):
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_text

st.title("Print OCR with TrOCR")

# Load model and processor
processor, model = load_model()

# File uploader
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

if uploaded_file is not None:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_column_width=True)
    
    with st.spinner("Extracting text..."):
        extracted_text = process_image(image, processor, model)
    
    st.subheader("Extracted Text:")
    st.write(extracted_text)

# Example URL processing
st.write("Or try with an example image:")
default_url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02-00.jpg"
if st.button("Process Example Image"):
    image = Image.open(requests.get(default_url, stream=True).raw).convert("RGB")
    st.image(image, caption="Example Image", use_column_width=True)
    with st.spinner("Extracting text..."):
        extracted_text = process_image(image, processor, model)
    st.subheader("Extracted Text:")
    st.write(extracted_text)