Spaces:

Fancy-MLLM
/

R1-Onevision

Running on Zero

App Files Files Community

Fancy-MLLM commited on Feb 11

Commit

e3288b1

verified ·

1 Parent(s): 84b2b3b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -64

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, Text
 from threading import Thread
 from qwen_vl_utils import process_vision_info
 import torch
-import time
 # Specify the local cache path for models
 local_path = "Fancy-MLLM/R1-OneVision-7B"
@@ -12,11 +11,14 @@ local_path = "Fancy-MLLM/R1-OneVision-7B"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
 # Function to process image and text and generate the output
-def generate_output(image, text, button_click):
     # Prepare input data
     messages = [
         {
@@ -30,9 +32,8 @@ def generate_output(image, text, button_click):
     # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # print(text_input)
-    # import pdb; pdb.set_trace()
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
         images=image_inputs,
@@ -40,7 +41,7 @@ def generate_output(image, text, button_click):
         padding=True,
         return_tensors="pt",
     )
-    inputs = inputs.to("cuda")
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
@@ -52,27 +53,25 @@ def generate_output(image, text, button_click):
         temperature=0.01,
         repetition_penalty=1.0,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    generated_text = ''
     try:
         for new_text in streamer:
             generated_text += new_text
             yield f"‎{generated_text}"
-            # print(f"Current text: {generated_text}")  # 调试输出
-            # yield generated_text  # 直接输出原始文本
     except Exception as e:
-        print(f"Error: {e}")
         yield f"Error occurred: {str(e)}"
 Css = """
 #output-markdown {
     overflow-y: auto;
     white-space: pre-wrap;
     word-wrap: break-word;
 }
 #output-markdown .math {
     overflow-x: auto;
     max-width: 100%;
@@ -86,66 +85,21 @@ Css = """
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
 with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload")
-            input_text = gr.Textbox(label="input your question")
             with gr.Row():
-                with gr.Column():
-                    clear_btn = gr.ClearButton([input_image, input_text])
-                with gr.Column():
-                    submit_btn = gr.Button("Submit", variant="primary")
-            # gr.Examples(
-            #     examples=[
-            #         ["20250208-205626.jpeg", "How many plums (see the picture) weigh as much as an apple?"],
-            #         ["38.jpg", "Each of the digits 2, 3, 4 and 5 will be placed in a square. Then there will be two numbers, which will be added together. What is the biggest number that they could make?"],
-            #         ["64.jpg", "Four of the numbers 1,3,4,5 and 7 are written into the boxes so that the calculation is correct.\nWhich number was not used?"],
-            #     ],
-            #     inputs=[input_image[0], input_text],
-            #     label="Example Inputs"
-            # )
         with gr.Column():
-            output_text = gr.Markdown(
-                label="Generated Response",
-                max_height="80vh",
-                min_height="50vh",
-                container=True,
-                latex_delimiters=[{
-                                        "left": "\\(",
-                                        "right": "\\)",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{equation\}",
-                                        "right": "\\end\{equation\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{align\}",
-                                        "right": "\\end\{align\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{alignat\}",
-                                        "right": "\\end\{alignat\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{gather\}",
-                                        "right": "\\end\{gather\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{CD\}",
-                                        "right": "\\end\{CD\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\[",
-                                        "right": "\\]",
-                                        "display": True
-                                    }],
-                elem_id="qwen-md")
     submit_btn.click(
         fn=generate_output,
@@ -153,5 +107,5 @@ with gr.Blocks(css=Css) as demo:
         outputs=output_text,
         queue=True
     )
 demo.launch(share=True)

 from threading import Thread
 from qwen_vl_utils import process_vision_info
 import torch
 # Specify the local cache path for models
 local_path = "Fancy-MLLM/R1-OneVision-7B"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
 # Function to process image and text and generate the output
+def generate_output(image_path, text):
+    # Load image from file path
+    from PIL import Image
+    image = Image.open(image_path).convert("RGB")
     # Prepare input data
     messages = [
         {
     # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
         images=image_inputs,
         padding=True,
         return_tensors="pt",
     )
+    inputs = inputs.to(model.device)  # 确保设备匹配
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         temperature=0.01,
         repetition_penalty=1.0,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
+    generated_text = ''
     try:
         for new_text in streamer:
             generated_text += new_text
             yield f"‎{generated_text}"
     except Exception as e:
         yield f"Error occurred: {str(e)}"
+# CSS for UI styling
 Css = """
 #output-markdown {
     overflow-y: auto;
     white-space: pre-wrap;
     word-wrap: break-word;
 }
 #output-markdown .math {
     overflow-x: auto;
     max-width: 100%;
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
+# Gradio UI
 with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="filepath", label="Upload")  # 关键修改：filepath 避免 UI 重绘
+            input_text = gr.Textbox(label="Input your question")
             with gr.Row():
+                clear_btn = gr.ClearButton([input_image, input_text])
+                submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
+            output_text = gr.Markdown(elem_id="qwen-md", container=True)
     submit_btn.click(
         fn=generate_output,
         outputs=output_text,
         queue=True
     )
 demo.launch(share=True)