preston-cell commited on
Commit
40c03fb
·
verified ·
1 Parent(s): e623f79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -41
app.py CHANGED
@@ -1,49 +1,18 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
3
 
4
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
5
- model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
 
 
 
 
 
6
 
7
  def launch(input):
8
- messages = [
9
- {
10
- "role": "user",
11
- "content":
12
- [
13
- {
14
- "type": "image",
15
- "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
16
- },
17
- {
18
- "type": "text", "text": "Describe this image."
19
- },
20
- ],
21
- }
22
- ]
23
-
24
- # Preparation for inference
25
- text = processor.apply_chat_template(
26
- messages, tokenize=False, add_generation_prompt=True
27
- )
28
- image_inputs, video_inputs = process_vision_info(messages)
29
- inputs = processor(
30
- text=[text],
31
- images=image_inputs,
32
- videos=video_inputs,
33
- padding=True,
34
- return_tensors="pt",
35
- )
36
- inputs = inputs.to("cuda")
37
-
38
- # Inference: Generation of the output
39
- generated_ids = model.generate(**inputs, max_new_tokens=128)
40
- generated_ids_trimmed = [
41
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
42
- ]
43
- output_text = processor.batch_decode(
44
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
45
- )
46
- return(output_text)
47
 
48
  iface = gr.Interface(launch,
49
  inputs=gr.Image(type='pil'),
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText
3
+ import torch
4
 
5
+ model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
6
+ processor = AutoProcessor.from_pretrained(model_path)
7
+ model = AutoModelForImageTextToText.from_pretrained(
8
+ model_path,
9
+ torch_dtype=torch.bfloat16,
10
+ _attn_implementation="flash_attention_2"
11
+ ).to("cuda")
12
 
13
  def launch(input):
14
+ out = model.generate(**input)
15
+ return(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  iface = gr.Interface(launch,
18
  inputs=gr.Image(type='pil'),