KingNish commited on
Commit
6bf8982
·
verified ·
1 Parent(s): 1ac43cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -11
app.py CHANGED
@@ -16,18 +16,34 @@ import os
16
  # "Qwen/Qwen2-VL-7B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
17
 
18
  # }
19
- def array_to_image_path(image_array):
20
- if image_array is None:
 
21
  raise ValueError("No image provided. Please upload an image before submitting.")
22
- # Convert numpy array to PIL Image
23
- img = Image.fromarray(np.uint8(image_array))
24
 
25
  # Generate a unique filename using timestamp
26
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
27
  filename = f"image_{timestamp}.png"
28
 
29
  # Save the image
30
- img.save(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Get the full path of the saved image
33
  full_path = os.path.abspath(filename)
@@ -53,15 +69,12 @@ assistant_prompt = '<|assistant|>\n'
53
  prompt_suffix = "<|end|>\n"
54
 
55
  @spaces.GPU
56
- def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
57
  image_path = array_to_image_path(image)
58
 
59
  print(image_path)
60
  model = models[model_id]
61
  processor = processors[model_id]
62
-
63
- prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
64
- image = Image.fromarray(image).convert("RGB")
65
  messages = [
66
  {
67
  "role": "user",
@@ -100,6 +113,54 @@ def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
100
 
101
  return output_text[0]
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  css = """
104
  #output {
105
  height: 500px;
@@ -113,14 +174,25 @@ with gr.Blocks(css=css) as demo:
113
  with gr.Tab(label="Qwen2-VL-7B Input"):
114
  with gr.Row():
115
  with gr.Column():
116
- input_img = gr.Image(label="Input Picture")
 
 
 
 
 
 
 
 
 
 
 
117
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
118
  text_input = gr.Textbox(label="Question")
119
  submit_btn = gr.Button(value="Submit")
120
  with gr.Column():
121
  output_text = gr.Textbox(label="Output Text")
122
 
123
- submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
124
 
125
  demo.queue(api_open=False)
126
  demo.launch(debug=True)
 
16
  # "Qwen/Qwen2-VL-7B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
17
 
18
  # }
19
+ def array_to_image_path(image):
20
+ if image is None:
21
+ gr.Warning("No video provided. Please upload an video before submitting.")
22
  raise ValueError("No image provided. Please upload an image before submitting.")
 
 
23
 
24
  # Generate a unique filename using timestamp
25
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
26
  filename = f"image_{timestamp}.png"
27
 
28
  # Save the image
29
+ image.save(filename)
30
+
31
+ # Get the full path of the saved image
32
+ full_path = os.path.abspath(filename)
33
+
34
+ return full_path
35
+
36
+ def array_to_video_path(video):
37
+ if video is None:
38
+ gr.Warning("No video provided. Please upload an video before submitting.")
39
+ raise ValueError("No video provided. Please upload an video before submitting.")
40
+
41
+ # Generate a unique filename using timestamp
42
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
43
+ filename = f"video_{timestamp}.mp4"
44
+
45
+ # Save the image
46
+ video.save(filename)
47
 
48
  # Get the full path of the saved image
49
  full_path = os.path.abspath(filename)
 
69
  prompt_suffix = "<|end|>\n"
70
 
71
  @spaces.GPU
72
+ def qwen_image(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
73
  image_path = array_to_image_path(image)
74
 
75
  print(image_path)
76
  model = models[model_id]
77
  processor = processors[model_id]
 
 
 
78
  messages = [
79
  {
80
  "role": "user",
 
113
 
114
  return output_text[0]
115
 
116
+ @spaces.GPU(duration=125)
117
+ def qwen_video(video, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
118
+ video_path = array_to_video_path(video)
119
+
120
+ print(video_path)
121
+ model = models[model_id]
122
+ processor = processors[model_id]
123
+
124
+ messages = [
125
+ {
126
+ "role": "user",
127
+ "content": [
128
+ {
129
+ "type": "video",
130
+ "video": video_path,
131
+ "max_pixels": 360 * 420,
132
+ "fps": 6.0,
133
+ },
134
+ {"type": "text", "text": text_input},
135
+ ],
136
+ }
137
+ ]
138
+
139
+ # Preparation for inference
140
+ text = processor.apply_chat_template(
141
+ messages, tokenize=False, add_generation_prompt=True
142
+ )
143
+ image_inputs, video_inputs = process_vision_info(messages)
144
+ inputs = processor(
145
+ text=[text],
146
+ images=image_inputs,
147
+ videos=video_inputs,
148
+ padding=True,
149
+ return_tensors="pt",
150
+ )
151
+ inputs = inputs.to("cuda")
152
+
153
+ # Inference: Generation of the output
154
+ generated_ids = model.generate(**inputs, max_new_tokens=1024)
155
+ generated_ids_trimmed = [
156
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
157
+ ]
158
+ output_text = processor.batch_decode(
159
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
160
+ )
161
+
162
+ return output_text[0]
163
+
164
  css = """
165
  #output {
166
  height: 500px;
 
174
  with gr.Tab(label="Qwen2-VL-7B Input"):
175
  with gr.Row():
176
  with gr.Column():
177
+ input_img = gr.Image(label="Input Picture", type="pil")
178
+ model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
179
+ text_input = gr.Textbox(label="Question")
180
+ submit_btn = gr.Button(value="Submit")
181
+ with gr.Column():
182
+ output_text = gr.Textbox(label="Output Text")
183
+
184
+ submit_btn.click(qwen_image, [input_img, text_input, model_selector], [output_text])
185
+ with gr.Tab(label="Qwen2-VL-7B Input"):
186
+ with gr.Row():
187
+ with gr.Column():
188
+ input_img = gr.Video(label="Input Picture", type="pil")
189
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
190
  text_input = gr.Textbox(label="Question")
191
  submit_btn = gr.Button(value="Submit")
192
  with gr.Column():
193
  output_text = gr.Textbox(label="Output Text")
194
 
195
+ submit_btn.click(qwen_video, [input_img, text_input, model_selector], [output_text])
196
 
197
  demo.queue(api_open=False)
198
  demo.launch(debug=True)