File size: 2,477 Bytes
0bbf6ef
 
1b3f90f
 
57d20a1
f496449
07f5bd9
aa8cd87
0bbf6ef
43d306c
e91a768
 
 
b1e4794
cff5fa2
1b3f90f
cff5fa2
 
 
 
 
 
e91a768
 
 
c65777e
1b3f90f
c65777e
 
e91a768
b296597
e8ad557
4504622
3cadd69
 
 
 
 
c65777e
1b3f90f
 
 
3cadd69
 
ec2e6e8
1b3f90f
cff5fa2
 
 
649e38b
1b3f90f
 
ff2c42f
 
 
649e38b
1b3f90f
e91a768
0bbf6ef
 
 
e91a768
 
 
e0a154b
e91a768
0bbf6ef
 
 
 
1b3f90f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import numpy as np
from pdf2image import convert_from_path
from PIL import Image
from ultralytics import YOLOv10

# Load the trained model
model = YOLOv10("best.pt")

# Define the class indices for figures and tables
figure_class_index = 3  # class index for figures
table_class_index = 4   # class index for tables

# Function to perform inference on an image and return bounding boxes for figures and tables
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
    results = model.predict(np.array(image))
    boxes = [
        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
        for result in results for box in result.boxes
        if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
    ]
    return boxes

# Function to crop images from the boxes
def crop_images_from_boxes(image, boxes, scale_factor):
    cropped_images = [
        image.crop((int(x1 * scale_factor), int(y1 * scale_factor), int(x2 * scale_factor), int(y2 * scale_factor)))
        for (x1, y1, x2, y2) in boxes
    ]
    return cropped_images

@spaces.GPU
def process_pdf(pdf_file):
    all_cropped_images = []

    # Set the DPI for inference and high resolution for cropping
    low_dpi = 50
    high_dpi = 300

    # Convert PDF pages to images at low DPI
    low_res_images = convert_from_path(pdf_file.name, dpi=low_dpi)
    
    # Calculate the scaling factor
    scale_factor = high_dpi / low_dpi

    for page_num, low_res_img in enumerate(low_res_images):
        # Get bounding boxes from low DPI image
        boxes = infer_image_and_get_boxes(low_res_img)
        
        if boxes:
            # Convert the specific page to high DPI only if boxes are found
            high_res_img = convert_from_path(pdf_file.name, dpi=high_dpi, first_page=page_num+1, last_page=page_num+1)[0]
            
            # Crop images at high DPI
            cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
            all_cropped_images.extend(cropped_imgs)

    return all_cropped_images

# Create Gradio interface
iface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(label="Upload a PDF"),
    outputs=gr.Gallery(label="Cropped Figures and Tables from PDF Pages"),
    title="Fast document layout analysis based on YOLOv10",
    description="Upload a PDF file to get cropped figures and tables from each page."
)

# Launch the app
iface.launch()