Spaces:
Sleeping
Sleeping
File size: 2,490 Bytes
0bbf6ef 1b3f90f 57d20a1 4da5a4d 07f5bd9 aa8cd87 0bbf6ef 43d306c e91a768 b1e4794 cff5fa2 1b3f90f cff5fa2 e91a768 c65777e 1b3f90f c65777e e91a768 b296597 e8ad557 4504622 3cadd69 c65777e 1b3f90f 3cadd69 ec2e6e8 1b3f90f cff5fa2 649e38b 1b3f90f ff2c42f 649e38b 1b3f90f e91a768 0bbf6ef e91a768 e0a154b e91a768 0bbf6ef 1b3f90f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
import numpy as np
from pdf2image import convert_from_path
from PIL import Image
from ultralytics import YOLOv10
import spaces
# Load the trained model
model = YOLOv10("best.pt")
# Define the class indices for figures and tables
figure_class_index = 3 # class index for figures
table_class_index = 4 # class index for tables
# Function to perform inference on an image and return bounding boxes for figures and tables
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
results = model.predict(np.array(image))
boxes = [
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
for result in results for box in result.boxes
if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
]
return boxes
# Function to crop images from the boxes
def crop_images_from_boxes(image, boxes, scale_factor):
cropped_images = [
image.crop((int(x1 * scale_factor), int(y1 * scale_factor), int(x2 * scale_factor), int(y2 * scale_factor)))
for (x1, y1, x2, y2) in boxes
]
return cropped_images
@spaces.GPU
def process_pdf(pdf_file):
all_cropped_images = []
# Set the DPI for inference and high resolution for cropping
low_dpi = 50
high_dpi = 300
# Convert PDF pages to images at low DPI
low_res_images = convert_from_path(pdf_file.name, dpi=low_dpi)
# Calculate the scaling factor
scale_factor = high_dpi / low_dpi
for page_num, low_res_img in enumerate(low_res_images):
# Get bounding boxes from low DPI image
boxes = infer_image_and_get_boxes(low_res_img)
if boxes:
# Convert the specific page to high DPI only if boxes are found
high_res_img = convert_from_path(pdf_file.name, dpi=high_dpi, first_page=page_num+1, last_page=page_num+1)[0]
# Crop images at high DPI
cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
all_cropped_images.extend(cropped_imgs)
return all_cropped_images
# Create Gradio interface
iface = gr.Interface(
fn=process_pdf,
inputs=gr.File(label="Upload a PDF"),
outputs=gr.Gallery(label="Cropped Figures and Tables from PDF Pages"),
title="Fast document layout analysis based on YOLOv10",
description="Upload a PDF file to get cropped figures and tables from each page."
)
# Launch the app
iface.launch()
|