Spaces:

ashen007
/

yolo-document-layout-demo

Running

App Files Files Community

yolo-document-layout-demo / app.py

ashen007

Update app.py

2693387 verified about 1 month ago

raw

history blame contribute delete

2.45 kB

	import gradio as gr
	from ultralytics import YOLO
	from PIL import Image
	from huggingface_hub import hf_hub_download
	import numpy as np
	import os

	# Load the YOLO model - use the proper format for Hugging Face repos
	try:
	# Format for loading from Hugging Face: "hf://username/model_name"
	model_repo = "hf://ashen007/document-structure-detection"
	model = YOLO(model_repo)
	print(f"Successfully loaded model from {model_repo}")

	except Exception as e:
	model_path = hf_hub_download(
	repo_id="ashen007/document-structure-detection",
	filename="DSD-YOLOv8-v2.pt"
	)
	model = YOLO(model_path)

	# Define your class names - update this with your actual class names
	class_names = [
	"Author", "Bigletter", "Bleeding", "Bold", "Caption", "Date", "Figure",
	"Footnote", "Header", "Italic", "List", "Map", "SubSubTitle", "SubTitle",
	"Table", "TextColumn", "Title", "Underline", "equations"
	]

	def predict(image):
	"""
	Runs prediction on the input image and returns the annotated image
	"""
	if image is None:
	return None

	try:
	# Convert to numpy array if needed
	if isinstance(image, Image.Image):
	image_np = np.array(image)
	else:
	image_np = image

	# Run prediction
	results = model(image_np, conf=0.35)

	# Return the annotated image
	return Image.fromarray(results[0].plot(labels=True))
	except Exception as e:
	print(f"Error during prediction: {e}")
	return None

	# Create examples folder if it doesn't exist
	if not os.path.exists("examples"):
	os.makedirs("examples")

	# Create Gradio interface
	demo = gr.Interface(
	fn=predict,
	inputs=gr.Image(type="pil"),
	outputs=gr.Image(type="pil"),
	title="Document Layout Analysis with YOLOv8",
	description="""
	## Document Layout Detection

	This model identifies various elements in document layouts including:
	- Text structures (TextColumns, Lists)
	- Semantic elements (Titles, Headers)
	- Typographical features (Bold, Italic)
	- Visual components (Figures, Tables)

	Upload an image of a document to analyze its layout structure.
	""",
	examples=[
	# Add paths to example images here
	# "examples/example1.jpg",
	# "examples/example2.jpg"
	]
	)

	# Launch the app - settings for Hugging Face Spaces
	if __name__ == "__main__":
	demo.launch()