Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

File size: 2,863 Bytes

3cb2a3f
a0652de
3cb2a3f
 
a0652de
 
3cb2a3f
 
 
 
a0652de
 
 
 
3cb2a3f
a0652de
3cb2a3f
 
a0652de
 
 
3cb2a3f
 
a0652de
 
 
 
 
3cb2a3f
 
 
 
a0652de
 
3cb2a3f
a0652de
 
 
 
 
 
 
 
3cb2a3f
a0652de
3cb2a3f
a0652de
 
3cb2a3f
 
a0652de
3cb2a3f
a0652de
 
 
3cb2a3f
a0652de
 
 
 
 
3cb2a3f
a0652de
3cb2a3f
a0652de
 
 
3cb2a3f
a0652de

import streamlit as st
from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text
import json

# Set up page layout and styling
st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="📄")

st.markdown(
    """
    <style>
    .reportview-container { background: #f4f4f4; }
    .sidebar .sidebar-content { background: #e0e0e0; }
    h1 { color: #007BFF; }
    .upload-btn { background-color: #007BFF; color: white; padding: 10px; border-radius: 5px; text-align: center; }
    </style>
    """, unsafe_allow_html=True
)

# --- Title Section ---
st.title("📄 MultiModel OCR Fusion")
st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).")

# --- Image Upload Section ---
uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"])

# Model selection
st.sidebar.title("Model Selection")
model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA"))

if uploaded_file is not None:
    st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)

    # Extract text from the image based on selected model
    with st.spinner(f"Extracting text using the {model_choice} model..."):
        try:
            if model_choice == "GOT":
                extracted_text = extract_text_got(uploaded_file)
            elif model_choice == "Qwen":
                extracted_text = extract_text_qwen(uploaded_file)
            elif model_choice == "LLaMA":
                extracted_text = extract_text_llama(uploaded_file)
            
            # If no text extracted
            if not extracted_text.strip():
                st.warning(f"No text extracted using {model_choice}.")
            else:
                # Clean the extracted text
                cleaned_text = clean_extracted_text(extracted_text)
        except Exception as e:
            st.error(f"Error during text extraction: {str(e)}")
            extracted_text, cleaned_text = "", ""

    # --- Display Extracted and Cleaned Text ---
    st.subheader(f"Extracted Text using {model_choice}")
    st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200)

    st.subheader("Cleaned Text (AI-processed)")
    st.text_area("Cleaned Text", cleaned_text, height=200)

    # Save extracted text for further use
    if extracted_text:
        with open("extracted_text.json", "w") as json_file:
            json.dump({"text": extracted_text}, json_file)

    # --- Keyword Search ---
    st.subheader("Search for Keywords")
    keyword = st.text_input("Enter a keyword to search in the extracted text")

    if keyword:
        if keyword.lower() in cleaned_text.lower():
            st.success(f"Keyword **'{keyword}'** found in the cleaned text!")
        else:
            st.error(f"Keyword **'{keyword}'** not found.")