mjavaid commited on
Commit
199e7c3
·
1 Parent(s): b1ec465

first commit

Browse files
Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -1,12 +1,12 @@
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
  import os
5
- import spaces
6
 
7
  hf_token = os.environ["HF_TOKEN"]
8
 
9
- # Load the Gemma 3 pipeline.
10
  pipe = pipeline(
11
  "image-text-to-text",
12
  model="google/gemma-3-4b-it",
@@ -14,45 +14,50 @@ pipe = pipeline(
14
  torch_dtype=torch.bfloat16,
15
  use_auth_token=hf_token
16
  )
 
17
  @spaces.GPU
18
  def generate_response(user_text, user_image):
19
- # Check if an image was uploaded.
20
  if user_image is None:
21
- return "Error: An image upload is mandatory."
22
 
23
- # Prepare messages with the system prompt and user inputs.
24
  messages = [
25
  {
26
  "role": "system",
27
  "content": [{"type": "text", "text": "You are a helpful assistant."}]
28
  }
29
  ]
 
30
  user_content = [{"type": "image", "image": user_image}]
31
  if user_text:
32
  user_content.append({"type": "text", "text": user_text})
 
33
  messages.append({"role": "user", "content": user_content})
34
 
35
- # Call the pipeline.
36
  output = pipe(text=messages, max_new_tokens=200)
37
 
38
- # Try to extract the generated content.
39
  try:
40
  response = output[0]["generated_text"][-1]["content"]
 
41
  except (KeyError, IndexError, TypeError):
42
- response = str(output)
43
-
44
- return response
45
 
46
- iface = gr.Interface(
47
- fn=generate_response,
48
- inputs=[
49
- gr.Textbox(label="Message", placeholder="Type your message here..."),
50
- gr.Image(type="pil", label="Upload an Image", source="upload")
51
- ],
52
- outputs=gr.Textbox(label="Response"),
53
- title="Gemma 3 Simple Interface",
54
- description="Enter your message and upload an image (image upload is mandatory) to get a response."
55
- )
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
- iface.launch()
 
1
+ import spaces
2
  import gradio as gr
3
  from transformers import pipeline
4
  import torch
5
  import os
 
6
 
7
  hf_token = os.environ["HF_TOKEN"]
8
 
9
+ # Load the Gemma 3 pipeline
10
  pipe = pipeline(
11
  "image-text-to-text",
12
  model="google/gemma-3-4b-it",
 
14
  torch_dtype=torch.bfloat16,
15
  use_auth_token=hf_token
16
  )
17
+
18
  @spaces.GPU
19
  def generate_response(user_text, user_image):
 
20
  if user_image is None:
21
+ return "Please upload an image (required)"
22
 
 
23
  messages = [
24
  {
25
  "role": "system",
26
  "content": [{"type": "text", "text": "You are a helpful assistant."}]
27
  }
28
  ]
29
+
30
  user_content = [{"type": "image", "image": user_image}]
31
  if user_text:
32
  user_content.append({"type": "text", "text": user_text})
33
+
34
  messages.append({"role": "user", "content": user_content})
35
 
36
+ # Call the pipeline with the provided messages
37
  output = pipe(text=messages, max_new_tokens=200)
38
 
 
39
  try:
40
  response = output[0]["generated_text"][-1]["content"]
41
+ return response
42
  except (KeyError, IndexError, TypeError):
43
+ return "Error processing the response. Please try again."
 
 
44
 
45
+ with gr.Blocks() as demo:
46
+ gr.Markdown("# Gemma 3 Image Analysis")
47
+ gr.Markdown("Upload an image and optionally add a prompt to get the model's response.")
48
+
49
+ with gr.Row():
50
+ img = gr.Image(type="pil", label="Upload an image (required)")
51
+ txt = gr.Textbox(label="Your prompt (optional)", placeholder="Describe what you see in this image")
52
+
53
+ output = gr.Textbox(label="Model Response")
54
+
55
+ submit_btn = gr.Button("Submit")
56
+ submit_btn.click(
57
+ generate_response,
58
+ inputs=[txt, img],
59
+ outputs=output
60
+ )
61
 
62
  if __name__ == "__main__":
63
+ demo.launch()