Spaces:

Didier
/

Vision_Language_SmolVLM2

Running on Zero

Didier commited on about 1 month ago

Commit

f62e7b9

verified ·

1 Parent(s): c1f96a9

Create module_vision.py

Files changed (1) hide show

module_vision.py ADDED Viewed

+"""
+File: module_chat.py
+Description: A module for chat using video/image + text with a multimodal interface.
+Author: Didier Guillevic
+Date: 2025-04-02
+"""
+import gradio as gr
+import vlm
+def process(message, history):
+    """Generate the model response given message and history
+    """
+    messages = vlm.build_messages(message, history)
+    yield from vlm.stream_response(messages)
+examples=[
+    [{"text": "What is happening in the video?", "files": ["samples/Usain_Bolt_floats_to_victory.mp4"]}],
+]
+#
+# User interface
+#
+with gr.Blocks() as demo:
+    chat_interface = gr.ChatInterface(
+        fn=process,
+        description="Chat with text / text+image / text+video.",
+        examples=examples,
+        cache_examples=False,
+        textbox=gr.MultimodalTextbox(
+            label="Query Input",
+            file_types=["image", ".mp4"],
+            file_count="multiple"
+        ),
+        stop_btn="Stop Generation",
+        multimodal=True,
+        type="messages"
+    )