Tonic commited on
Commit
4de8e25
·
unverified ·
1 Parent(s): b5deb5f

add bounding boxes

Browse files
Files changed (1) hide show
  1. app.py +72 -11
app.py CHANGED
@@ -5,6 +5,12 @@ import requests
5
  from transformers import AutoProcessor
6
  from modeling_florence2 import Florence2ForConditionalGeneration
7
  from configuration_florence2 import Florence2Config
 
 
 
 
 
 
8
 
9
  # Initialize model and processor
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -25,6 +31,40 @@ TASK_PROMPTS = {
25
  "Region Proposal": "<REGION_PROPOSAL>"
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def process_image(image, task):
29
  prompt = TASK_PROMPTS[task]
30
  inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
@@ -39,19 +79,40 @@ def process_image(image, task):
39
 
40
  parsed_answer = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))
41
 
42
- return str(parsed_answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Define Gradio interface
45
- iface = gr.Interface(
46
- fn=process_image,
47
- inputs=[
48
- gr.Image(type="pil"),
49
- gr.Dropdown(list(TASK_PROMPTS.keys()), label="Task")
50
- ],
51
- outputs=gr.Textbox(label="Result"),
52
- title="Florence-2 Demo",
53
- description="Upload an image and select a task to process with Florence-2."
54
- )
 
 
 
 
 
 
 
 
55
 
56
  # Launch the interface
57
  iface.launch()
 
5
  from transformers import AutoProcessor
6
  from modeling_florence2 import Florence2ForConditionalGeneration
7
  from configuration_florence2 import Florence2Config
8
+ import io
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib.patches as patches
11
+ import numpy as np
12
+ import random
13
+ import copy
14
 
15
  # Initialize model and processor
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
31
  "Region Proposal": "<REGION_PROPOSAL>"
32
  }
33
 
34
+ colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
35
+ 'lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
36
+
37
+ def fig_to_pil(fig):
38
+ buf = io.BytesIO()
39
+ fig.savefig(buf, format='png')
40
+ buf.seek(0)
41
+ return Image.open(buf)
42
+
43
+ def plot_bbox(image, data):
44
+ fig, ax = plt.subplots()
45
+ ax.imshow(image)
46
+ for bbox, label in zip(data['bboxes'], data['labels']):
47
+ x1, y1, x2, y2 = bbox
48
+ rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
49
+ ax.add_patch(rect)
50
+ plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
51
+ ax.axis('off')
52
+ return fig
53
+
54
+ def draw_ocr_bboxes(image, prediction):
55
+ scale = 1
56
+ draw = ImageDraw.Draw(image)
57
+ bboxes, labels = prediction['quad_boxes'], prediction['labels']
58
+ for box, label in zip(bboxes, labels):
59
+ color = random.choice(colormap)
60
+ new_box = (np.array(box) * scale).tolist()
61
+ draw.polygon(new_box, width=3, outline=color)
62
+ draw.text((new_box[0]+8, new_box[1]+2),
63
+ "{}".format(label),
64
+ align="right",
65
+ fill=color)
66
+ return image
67
+
68
  def process_image(image, task):
69
  prompt = TASK_PROMPTS[task]
70
  inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
 
79
 
80
  parsed_answer = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))
81
 
82
+ return parsed_answer
83
+
84
+ def main_process(image, task):
85
+ result = process_image(image, task)
86
+
87
+ if task in ["Object Detection", "Dense Region Caption", "Region Proposal"]:
88
+ fig = plot_bbox(image, result[TASK_PROMPTS[task]])
89
+ output_image = fig_to_pil(fig)
90
+ elif task == "OCR with Region":
91
+ output_image = draw_ocr_bboxes(image.copy(), result[TASK_PROMPTS[task]])
92
+ else:
93
+ output_image = None
94
+
95
+ return {task: str(result)}, output_image
96
 
97
  # Define Gradio interface
98
+ with gr.Blocks(title="Florence-2 Demo") as iface:
99
+ gr.Markdown("# Florence-2 Demo")
100
+ gr.Markdown("Upload an image and select a task to process with Florence-2.")
101
+
102
+ with gr.Row():
103
+ image_input = gr.Image(type="pil", label="Input Image")
104
+ task_dropdown = gr.Dropdown(list(TASK_PROMPTS.keys()), label="Task")
105
+
106
+ submit_button = gr.Button("Process")
107
+
108
+ output_text = gr.JSON(label="Output")
109
+ output_image = gr.Image(label="Processed Image")
110
+
111
+ submit_button.click(
112
+ fn=main_process,
113
+ inputs=[image_input, task_dropdown],
114
+ outputs=[output_text, output_image]
115
+ )
116
 
117
  # Launch the interface
118
  iface.launch()