import streamlit as st import torch from transformers import AutoProcessor, AutoModelForVision2Seq from PIL import Image device = "cuda" if torch.cuda.is_available() else "cpu" # Load model and processor @st.cache_resource def load_model(): model_name = "tjoab/latex_finetuned" processor = AutoProcessor.from_pretrained(model_name) model = AutoModelForVision2Seq.from_pretrained(model_name).to(device) return processor, model processor, model = load_model() st.title("LaTeX Image to Text Converter") st.write("Upload an image containing a handwritten or printed math expression, and get the LaTeX code.") # TODO: Add .png support (doesnt work as is with PIL Image.open()) uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg"]) if uploaded_file: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="Uploaded Image", use_column_width=True) # Preprocess image as model expects, then run inference with st.spinner("Processing..."): preproc_image = processor.image_processor(image, return_tensors="pt").pixel_values preproc_image = preproc_image.to(device) pred_ids = model.generate(preproc_image, max_length=128) latex_pred = processor.batch_decode(pred_ids, skip_special_tokens=True)[0] st.subheader("Predicted LaTeX Code:") st.code(latex_pred, language="latex")