zliang commited on
Commit
9b47e37
·
verified ·
1 Parent(s): 82d9e66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -21
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import gradio as gr
3
  import numpy as np
4
  import fitz # PyMuPDF
@@ -6,27 +5,26 @@ import spaces
6
  from ultralytics import YOLOv10
7
 
8
  # Load the trained model
9
-
10
  model = YOLOv10("best.pt")
11
 
12
-
13
  # Define the class indices for figures and tables
14
  figure_class_index = 3 # class index for figures
15
  table_class_index = 4 # class index for tables
16
 
17
- # Function to perform inference on an image and return bounding boxes for figures and tables
18
-
19
- def infer_image_and_get_boxes(image, confidence_threshold=0.6):
20
- results = model.predict(image)
21
- boxes = [
22
- (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
23
- for result in results for box in result.boxes
24
- if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
25
- ]
26
- return boxes
 
 
27
 
28
  # Function to crop images from the boxes
29
-
30
  def crop_images_from_boxes(image, boxes, scale_factor):
31
  cropped_images = [
32
  image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
@@ -49,14 +47,18 @@ def process_pdf(pdf_file):
49
 
50
  # Pre-cache all page pixmaps at low DPI
51
  low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
 
 
 
 
 
 
52
 
53
- # Loop through each page
54
- for page_num, low_res_pix in enumerate(low_res_pixmaps):
55
- low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
56
-
57
- # Get bounding boxes from low DPI image
58
- boxes = infer_image_and_get_boxes(low_res_img)
59
-
60
  if boxes:
61
  # Load high DPI image for cropping only if boxes are found
62
  high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
@@ -80,3 +82,4 @@ iface = gr.Interface(
80
  # Launch the app
81
  iface.launch()
82
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import fitz # PyMuPDF
 
5
  from ultralytics import YOLOv10
6
 
7
  # Load the trained model
 
8
  model = YOLOv10("best.pt")
9
 
 
10
  # Define the class indices for figures and tables
11
  figure_class_index = 3 # class index for figures
12
  table_class_index = 4 # class index for tables
13
 
14
+ # Function to perform inference on a batch of images and return bounding boxes for figures and tables
15
+ def infer_images_and_get_boxes(images, confidence_threshold=0.6):
16
+ results = model.predict(images)
17
+ all_boxes = []
18
+ for result in results:
19
+ boxes = [
20
+ (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
21
+ for box in result.boxes
22
+ if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
23
+ ]
24
+ all_boxes.append(boxes)
25
+ return all_boxes
26
 
27
  # Function to crop images from the boxes
 
28
  def crop_images_from_boxes(image, boxes, scale_factor):
29
  cropped_images = [
30
  image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
 
47
 
48
  # Pre-cache all page pixmaps at low DPI
49
  low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
50
+
51
+ # Prepare a batch of low resolution images for inference
52
+ low_res_imgs = [
53
+ np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
54
+ for pix in low_res_pixmaps
55
+ ]
56
 
57
+ # Run inference on the batch of low resolution images
58
+ all_boxes = infer_images_and_get_boxes(low_res_imgs)
59
+
60
+ # Loop through each page and corresponding boxes
61
+ for page_num, (low_res_img, boxes) in enumerate(zip(low_res_imgs, all_boxes)):
 
 
62
  if boxes:
63
  # Load high DPI image for cropping only if boxes are found
64
  high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
 
82
  # Launch the app
83
  iface.launch()
84
 
85
+