File size: 2,449 Bytes
6414d1d 1dac99a 6414d1d 1dac99a 6414d1d 1dac99a 6414d1d 2693387 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
from ultralytics import YOLO
from PIL import Image
from huggingface_hub import hf_hub_download
import numpy as np
import os
# Load the YOLO model - use the proper format for Hugging Face repos
try:
# Format for loading from Hugging Face: "hf://username/model_name"
model_repo = "hf://ashen007/document-structure-detection"
model = YOLO(model_repo)
print(f"Successfully loaded model from {model_repo}")
except Exception as e:
model_path = hf_hub_download(
repo_id="ashen007/document-structure-detection",
filename="DSD-YOLOv8-v2.pt"
)
model = YOLO(model_path)
# Define your class names - update this with your actual class names
class_names = [
"Author", "Bigletter", "Bleeding", "Bold", "Caption", "Date", "Figure",
"Footnote", "Header", "Italic", "List", "Map", "SubSubTitle", "SubTitle",
"Table", "TextColumn", "Title", "Underline", "equations"
]
def predict(image):
"""
Runs prediction on the input image and returns the annotated image
"""
if image is None:
return None
try:
# Convert to numpy array if needed
if isinstance(image, Image.Image):
image_np = np.array(image)
else:
image_np = image
# Run prediction
results = model(image_np, conf=0.35)
# Return the annotated image
return Image.fromarray(results[0].plot(labels=True))
except Exception as e:
print(f"Error during prediction: {e}")
return None
# Create examples folder if it doesn't exist
if not os.path.exists("examples"):
os.makedirs("examples")
# Create Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
outputs=gr.Image(type="pil"),
title="Document Layout Analysis with YOLOv8",
description="""
## Document Layout Detection
This model identifies various elements in document layouts including:
- Text structures (TextColumns, Lists)
- Semantic elements (Titles, Headers)
- Typographical features (Bold, Italic)
- Visual components (Figures, Tables)
Upload an image of a document to analyze its layout structure.
""",
examples=[
# Add paths to example images here
# "examples/example1.jpg",
# "examples/example2.jpg"
]
)
# Launch the app - settings for Hugging Face Spaces
if __name__ == "__main__":
demo.launch() |