Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 31, 2024

Commit

9b47e37

verified ·

1 Parent(s): 82d9e66

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -21

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import gradio as gr
 import numpy as np
 import fitz  # PyMuPDF
@@ -6,27 +5,26 @@ import spaces
 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
-# Function to perform inference on an image and return bounding boxes for figures and tables
-def infer_image_and_get_boxes(image, confidence_threshold=0.6):
-    results = model.predict(image)
-    boxes = [
-        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
-        for result in results for box in result.boxes
-        if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
-    ]
-    return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
     cropped_images = [
         image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
@@ -49,14 +47,18 @@ def process_pdf(pdf_file):
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
-    # Loop through each page
-    for page_num, low_res_pix in enumerate(low_res_pixmaps):
-        low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
-        # Get bounding boxes from low DPI image
-        boxes = infer_image_and_get_boxes(low_res_img)
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
@@ -80,3 +82,4 @@ iface = gr.Interface(
 # Launch the app
 iface.launch()

 import gradio as gr
 import numpy as np
 import fitz  # PyMuPDF
 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
+# Function to perform inference on a batch of images and return bounding boxes for figures and tables
+def infer_images_and_get_boxes(images, confidence_threshold=0.6):
+    results = model.predict(images)
+    all_boxes = []
+    for result in results:
+        boxes = [
+            (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
+            for box in result.boxes
+            if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
+        ]
+        all_boxes.append(boxes)
+    return all_boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
     cropped_images = [
         image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
+    # Prepare a batch of low resolution images for inference
+    low_res_imgs = [
+        np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
+        for pix in low_res_pixmaps
+    ]
+    # Run inference on the batch of low resolution images
+    all_boxes = infer_images_and_get_boxes(low_res_imgs)
+    # Loop through each page and corresponding boxes
+    for page_num, (low_res_img, boxes) in enumerate(zip(low_res_imgs, all_boxes)):
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
 # Launch the app
 iface.launch()