Spaces:

ashen007
/

yolo-document-layout-demo

Running

File size: 2,449 Bytes

import gradio as gr
from ultralytics import YOLO
from PIL import Image
from huggingface_hub import hf_hub_download
import numpy as np
import os

# Load the YOLO model - use the proper format for Hugging Face repos
try:
    # Format for loading from Hugging Face: "hf://username/model_name"
    model_repo = "hf://ashen007/document-structure-detection"
    model = YOLO(model_repo)
    print(f"Successfully loaded model from {model_repo}")
    
except Exception as e:
    model_path = hf_hub_download(
      repo_id="ashen007/document-structure-detection", 
      filename="DSD-YOLOv8-v2.pt"
    )
    model = YOLO(model_path)

# Define your class names - update this with your actual class names
class_names = [
    "Author", "Bigletter", "Bleeding", "Bold", "Caption", "Date", "Figure", 
    "Footnote", "Header", "Italic", "List", "Map", "SubSubTitle", "SubTitle", 
    "Table", "TextColumn", "Title", "Underline", "equations"
]

def predict(image):
    """
    Runs prediction on the input image and returns the annotated image
    """
    if image is None:
        return None
    
    try:
        # Convert to numpy array if needed
        if isinstance(image, Image.Image):
            image_np = np.array(image)
        else:
            image_np = image
        
        # Run prediction
        results = model(image_np, conf=0.35)
        
        # Return the annotated image
        return Image.fromarray(results[0].plot(labels=True))
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None

# Create examples folder if it doesn't exist
if not os.path.exists("examples"):
    os.makedirs("examples")

# Create Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    title="Document Layout Analysis with YOLOv8",
    description="""
    ## Document Layout Detection
    
    This model identifies various elements in document layouts including:
    - Text structures (TextColumns, Lists)
    - Semantic elements (Titles, Headers)
    - Typographical features (Bold, Italic)
    - Visual components (Figures, Tables)
    
    Upload an image of a document to analyze its layout structure.
    """,
    examples=[
        # Add paths to example images here
        # "examples/example1.jpg",
        # "examples/example2.jpg"
    ]
)

# Launch the app - settings for Hugging Face Spaces
if __name__ == "__main__":
    demo.launch()