Spaces:

NikhilJoson
/

Multimodal_Chat_JanusPro

Running on Zero

App Files Files Community

NikhilJoson commited on Feb 24

Commit

e200100

verified ·

1 Parent(s): e0402e9

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -59

app.py CHANGED Viewed

@@ -1,68 +1,45 @@
-import gradio as gr
 import torch
-from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoModelForVision2Seq, pipeline
-from janus.models import MultiModalityCausalLM, VLChatProcessor
-from janus.utils.io import load_pil_images
-from PIL import Image
-import numpy as np
-import os
-import time
-from Upsample import RealESRGAN
-import spaces  # Import spaces for ZeroGPU compatibility
-import re
-# Load Janus Pro models for vision and text tasks
-vision_model = AutoModelForVision2Seq.from_pretrained("deepseek-ai/janus-pro-7b", torch_dtype=torch.float16, device_map="auto")
-text_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/janus-pro-7b", torch_dtype=torch.float16, device_map="auto")
-processor = AutoProcessor.from_pretrained("deepseek-ai/janus-pro-7b")
-image_pipe = pipeline("text-to-image", model="deepseek-ai/janus-pro-7b")
-last_uploaded_image = None
-def detect_image_request(user_input):
-    image_keywords = ["generate an image", "create an image", "show me a picture", "draw", "visualize","generate image",
-                      "image generation", "get me an image", "get an image", "need an image", "need image",]
-    return any(re.search(keyword, user_input, re.IGNORECASE) for keyword in image_keywords)
-def chatbot(history, image=None, user_input=""):
-    global last_uploaded_image
-    if image:
-        last_uploaded_image = image  # Store the latest uploaded image
-    if detect_image_request(user_input):
-        image = image_pipe(user_input)
-        history.append((user_input, "[Generated Image]"))
-        return history, "", image[0]["image"]
-    if last_uploaded_image:
-        inputs = processor(images=last_uploaded_image, return_tensors="pt").to("cuda")
-        output = vision_model.generate(**inputs)
-        response = processor.decode(output[0], skip_special_tokens=True)
     else:
-        response = text_model.generate(user_input)
-        response = processor.decode(response[0], skip_special_tokens=True)
     history.append((user_input, response))
-    return history, "", None
-def reset_chat():
-    global last_uploaded_image
-    last_uploaded_image = None
-    return [], ""
 with gr.Blocks() as demo:
-    gr.Markdown("# Janus Pro Chatbot with Vision & Image Generation")
-    chatbot_interface = gr.Chatbot()
-    with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload Image")
-        text_input = gr.Textbox(label="Type your message")
-    send_button = gr.Button("Send")
-    reset_button = gr.Button("Reset Chat")
-    image_output = gr.Image(label="Generated Image")
-    send_button.click(chatbot, [chatbot_interface, image_input, text_input], [chatbot_interface, text_input, image_output])
-    reset_button.click(reset_chat, [], [chatbot_interface, text_input])
 demo.launch()

 import torch
+import argparse
+import gradio as gr
+from janus import JanusProcessor, JanusForConditionalGeneration
+from transformers import AutoTokenizer
+# Load Model and Processor
+model_id = "allenai/janus-pro-7b"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+processor = JanusProcessor.from_pretrained(model_id)
+model = JanusForConditionalGeneration.from_pretrained(
+    model_id, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
+).to(device)
+def chat_with_model(history, user_input, image=None):
+    if image is not None:
+        inputs = processor(text=user_input, images=image, return_tensors="pt").to(device)
     else:
+        inputs = processor(text=user_input, return_tensors="pt").to(device)
+    generated_ids = model.generate(**inputs, max_new_tokens=100)
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     history.append((user_input, response))
+    return history, ""
 with gr.Blocks() as demo:
+    gr.Markdown("# Chat with Janus Pro 7B (Multimodal AI)")
+    chat_history = gr.State([])
+    chatbot = gr.Chatbot()
+    user_input = gr.Textbox(label="Your message")
+    image_input = gr.Image(label="Upload an image (optional)", type="pil", optional=True)
+    send_btn = gr.Button("Send")
+    send_btn.click(chat_with_model, inputs=[chat_history, user_input, image_input], outputs=[chatbot, user_input])
+    # gr.Examples([
+    #     ["Describe this image", "example_image.jpg"],
+    #     ["Generate an image of a futuristic city"],
+    # ], inputs=[user_input, image_input])
 demo.launch()