Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
import fitz # PyMuPDF
|
@@ -6,27 +5,26 @@ import spaces
|
|
6 |
from ultralytics import YOLOv10
|
7 |
|
8 |
# Load the trained model
|
9 |
-
|
10 |
model = YOLOv10("best.pt")
|
11 |
|
12 |
-
|
13 |
# Define the class indices for figures and tables
|
14 |
figure_class_index = 3 # class index for figures
|
15 |
table_class_index = 4 # class index for tables
|
16 |
|
17 |
-
# Function to perform inference on
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
27 |
|
28 |
# Function to crop images from the boxes
|
29 |
-
|
30 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
31 |
cropped_images = [
|
32 |
image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
|
@@ -49,14 +47,18 @@ def process_pdf(pdf_file):
|
|
49 |
|
50 |
# Pre-cache all page pixmaps at low DPI
|
51 |
low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
boxes = infer_image_and_get_boxes(low_res_img)
|
59 |
-
|
60 |
if boxes:
|
61 |
# Load high DPI image for cropping only if boxes are found
|
62 |
high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
|
@@ -80,3 +82,4 @@ iface = gr.Interface(
|
|
80 |
# Launch the app
|
81 |
iface.launch()
|
82 |
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import fitz # PyMuPDF
|
|
|
5 |
from ultralytics import YOLOv10
|
6 |
|
7 |
# Load the trained model
|
|
|
8 |
model = YOLOv10("best.pt")
|
9 |
|
|
|
10 |
# Define the class indices for figures and tables
|
11 |
figure_class_index = 3 # class index for figures
|
12 |
table_class_index = 4 # class index for tables
|
13 |
|
14 |
+
# Function to perform inference on a batch of images and return bounding boxes for figures and tables
|
15 |
+
def infer_images_and_get_boxes(images, confidence_threshold=0.6):
|
16 |
+
results = model.predict(images)
|
17 |
+
all_boxes = []
|
18 |
+
for result in results:
|
19 |
+
boxes = [
|
20 |
+
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
|
21 |
+
for box in result.boxes
|
22 |
+
if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
|
23 |
+
]
|
24 |
+
all_boxes.append(boxes)
|
25 |
+
return all_boxes
|
26 |
|
27 |
# Function to crop images from the boxes
|
|
|
28 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
29 |
cropped_images = [
|
30 |
image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
|
|
|
47 |
|
48 |
# Pre-cache all page pixmaps at low DPI
|
49 |
low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
|
50 |
+
|
51 |
+
# Prepare a batch of low resolution images for inference
|
52 |
+
low_res_imgs = [
|
53 |
+
np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
|
54 |
+
for pix in low_res_pixmaps
|
55 |
+
]
|
56 |
|
57 |
+
# Run inference on the batch of low resolution images
|
58 |
+
all_boxes = infer_images_and_get_boxes(low_res_imgs)
|
59 |
+
|
60 |
+
# Loop through each page and corresponding boxes
|
61 |
+
for page_num, (low_res_img, boxes) in enumerate(zip(low_res_imgs, all_boxes)):
|
|
|
|
|
62 |
if boxes:
|
63 |
# Load high DPI image for cropping only if boxes are found
|
64 |
high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
|
|
|
82 |
# Launch the app
|
83 |
iface.launch()
|
84 |
|
85 |
+
|