Fancy-MLLM commited on
Commit
e3288b1
·
verified ·
1 Parent(s): 84b2b3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -64
app.py CHANGED
@@ -3,7 +3,6 @@ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, Text
3
  from threading import Thread
4
  from qwen_vl_utils import process_vision_info
5
  import torch
6
- import time
7
 
8
  # Specify the local cache path for models
9
  local_path = "Fancy-MLLM/R1-OneVision-7B"
@@ -12,11 +11,14 @@ local_path = "Fancy-MLLM/R1-OneVision-7B"
12
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
13
  local_path, torch_dtype="auto", device_map="cpu"
14
  )
15
-
16
  processor = AutoProcessor.from_pretrained(local_path)
17
 
18
  # Function to process image and text and generate the output
19
- def generate_output(image, text, button_click):
 
 
 
 
20
  # Prepare input data
21
  messages = [
22
  {
@@ -30,9 +32,8 @@ def generate_output(image, text, button_click):
30
 
31
  # Prepare inputs for the model
32
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
- # print(text_input)
34
- # import pdb; pdb.set_trace()
35
  image_inputs, video_inputs = process_vision_info(messages)
 
36
  inputs = processor(
37
  text=[text_input],
38
  images=image_inputs,
@@ -40,7 +41,7 @@ def generate_output(image, text, button_click):
40
  padding=True,
41
  return_tensors="pt",
42
  )
43
- inputs = inputs.to("cuda")
44
 
45
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
46
  generation_kwargs = dict(
@@ -52,27 +53,25 @@ def generate_output(image, text, button_click):
52
  temperature=0.01,
53
  repetition_penalty=1.0,
54
  )
 
55
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
56
  thread.start()
57
- generated_text = ''
58
 
 
59
  try:
60
  for new_text in streamer:
61
  generated_text += new_text
62
  yield f"‎{generated_text}"
63
- # print(f"Current text: {generated_text}") # 调试输出
64
- # yield generated_text # 直接输出原始文本
65
  except Exception as e:
66
- print(f"Error: {e}")
67
  yield f"Error occurred: {str(e)}"
68
 
 
69
  Css = """
70
  #output-markdown {
71
  overflow-y: auto;
72
  white-space: pre-wrap;
73
  word-wrap: break-word;
74
  }
75
-
76
  #output-markdown .math {
77
  overflow-x: auto;
78
  max-width: 100%;
@@ -86,66 +85,21 @@ Css = """
86
  #qwen-md .katex-display>.katex>.katex-html { display: inline; }
87
  """
88
 
 
89
  with gr.Blocks(css=Css) as demo:
90
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
91
 
92
  with gr.Row():
93
  with gr.Column():
94
- input_image = gr.Image(type="pil", label="Upload")
95
- input_text = gr.Textbox(label="input your question")
 
96
  with gr.Row():
97
- with gr.Column():
98
- clear_btn = gr.ClearButton([input_image, input_text])
99
- with gr.Column():
100
- submit_btn = gr.Button("Submit", variant="primary")
101
 
102
- # gr.Examples(
103
- # examples=[
104
- # ["20250208-205626.jpeg", "How many plums (see the picture) weigh as much as an apple?"],
105
- # ["38.jpg", "Each of the digits 2, 3, 4 and 5 will be placed in a square. Then there will be two numbers, which will be added together. What is the biggest number that they could make?"],
106
- # ["64.jpg", "Four of the numbers 1,3,4,5 and 7 are written into the boxes so that the calculation is correct.\nWhich number was not used?"],
107
- # ],
108
- # inputs=[input_image[0], input_text],
109
- # label="Example Inputs"
110
- # )
111
  with gr.Column():
112
- output_text = gr.Markdown(
113
- label="Generated Response",
114
- max_height="80vh",
115
- min_height="50vh",
116
- container=True,
117
- latex_delimiters=[{
118
- "left": "\\(",
119
- "right": "\\)",
120
- "display": True
121
- }, {
122
- "left": "\\begin\{equation\}",
123
- "right": "\\end\{equation\}",
124
- "display": True
125
- }, {
126
- "left": "\\begin\{align\}",
127
- "right": "\\end\{align\}",
128
- "display": True
129
- }, {
130
- "left": "\\begin\{alignat\}",
131
- "right": "\\end\{alignat\}",
132
- "display": True
133
- }, {
134
- "left": "\\begin\{gather\}",
135
- "right": "\\end\{gather\}",
136
- "display": True
137
- }, {
138
- "left": "\\begin\{CD\}",
139
- "right": "\\end\{CD\}",
140
- "display": True
141
- }, {
142
- "left": "\\[",
143
- "right": "\\]",
144
- "display": True
145
- }],
146
- elem_id="qwen-md")
147
-
148
-
149
 
150
  submit_btn.click(
151
  fn=generate_output,
@@ -153,5 +107,5 @@ with gr.Blocks(css=Css) as demo:
153
  outputs=output_text,
154
  queue=True
155
  )
 
156
  demo.launch(share=True)
157
-
 
3
  from threading import Thread
4
  from qwen_vl_utils import process_vision_info
5
  import torch
 
6
 
7
  # Specify the local cache path for models
8
  local_path = "Fancy-MLLM/R1-OneVision-7B"
 
11
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12
  local_path, torch_dtype="auto", device_map="cpu"
13
  )
 
14
  processor = AutoProcessor.from_pretrained(local_path)
15
 
16
  # Function to process image and text and generate the output
17
+ def generate_output(image_path, text):
18
+ # Load image from file path
19
+ from PIL import Image
20
+ image = Image.open(image_path).convert("RGB")
21
+
22
  # Prepare input data
23
  messages = [
24
  {
 
32
 
33
  # Prepare inputs for the model
34
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
35
  image_inputs, video_inputs = process_vision_info(messages)
36
+
37
  inputs = processor(
38
  text=[text_input],
39
  images=image_inputs,
 
41
  padding=True,
42
  return_tensors="pt",
43
  )
44
+ inputs = inputs.to(model.device) # 确保设备匹配
45
 
46
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
47
  generation_kwargs = dict(
 
53
  temperature=0.01,
54
  repetition_penalty=1.0,
55
  )
56
+
57
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
58
  thread.start()
 
59
 
60
+ generated_text = ''
61
  try:
62
  for new_text in streamer:
63
  generated_text += new_text
64
  yield f"‎{generated_text}"
 
 
65
  except Exception as e:
 
66
  yield f"Error occurred: {str(e)}"
67
 
68
+ # CSS for UI styling
69
  Css = """
70
  #output-markdown {
71
  overflow-y: auto;
72
  white-space: pre-wrap;
73
  word-wrap: break-word;
74
  }
 
75
  #output-markdown .math {
76
  overflow-x: auto;
77
  max-width: 100%;
 
85
  #qwen-md .katex-display>.katex>.katex-html { display: inline; }
86
  """
87
 
88
+ # Gradio UI
89
  with gr.Blocks(css=Css) as demo:
90
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
91
 
92
  with gr.Row():
93
  with gr.Column():
94
+ input_image = gr.Image(type="filepath", label="Upload") # 关键修改:filepath 避免 UI 重绘
95
+ input_text = gr.Textbox(label="Input your question")
96
+
97
  with gr.Row():
98
+ clear_btn = gr.ClearButton([input_image, input_text])
99
+ submit_btn = gr.Button("Submit", variant="primary")
 
 
100
 
 
 
 
 
 
 
 
 
 
101
  with gr.Column():
102
+ output_text = gr.Markdown(elem_id="qwen-md", container=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  submit_btn.click(
105
  fn=generate_output,
 
107
  outputs=output_text,
108
  queue=True
109
  )
110
+
111
  demo.launch(share=True)