|
import gradio as gr |
|
from ultralytics import YOLO |
|
from PIL import Image |
|
from huggingface_hub import hf_hub_download |
|
import numpy as np |
|
import os |
|
|
|
|
|
try: |
|
|
|
model_repo = "hf://ashen007/document-structure-detection" |
|
model = YOLO(model_repo) |
|
print(f"Successfully loaded model from {model_repo}") |
|
|
|
except Exception as e: |
|
model_path = hf_hub_download( |
|
repo_id="ashen007/document-structure-detection", |
|
filename="DSD-YOLOv8-v2.pt" |
|
) |
|
model = YOLO(model_path) |
|
|
|
|
|
class_names = [ |
|
"Author", "Bigletter", "Bleeding", "Bold", "Caption", "Date", "Figure", |
|
"Footnote", "Header", "Italic", "List", "Map", "SubSubTitle", "SubTitle", |
|
"Table", "TextColumn", "Title", "Underline", "equations" |
|
] |
|
|
|
def predict(image): |
|
""" |
|
Runs prediction on the input image and returns the annotated image |
|
""" |
|
if image is None: |
|
return None |
|
|
|
try: |
|
|
|
if isinstance(image, Image.Image): |
|
image_np = np.array(image) |
|
else: |
|
image_np = image |
|
|
|
|
|
results = model(image_np, conf=0.35) |
|
|
|
|
|
return Image.fromarray(results[0].plot(labels=True)) |
|
except Exception as e: |
|
print(f"Error during prediction: {e}") |
|
return None |
|
|
|
|
|
if not os.path.exists("examples"): |
|
os.makedirs("examples") |
|
|
|
|
|
demo = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Image(type="pil"), |
|
outputs=gr.Image(type="pil"), |
|
title="Document Layout Analysis with YOLOv8", |
|
description=""" |
|
## Document Layout Detection |
|
|
|
This model identifies various elements in document layouts including: |
|
- Text structures (TextColumns, Lists) |
|
- Semantic elements (Titles, Headers) |
|
- Typographical features (Bold, Italic) |
|
- Visual components (Figures, Tables) |
|
|
|
Upload an image of a document to analyze its layout structure. |
|
""", |
|
examples=[ |
|
|
|
|
|
|
|
] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |