NikhilJoson commited on
Commit
70a9186
·
verified ·
1 Parent(s): b56f0d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -30,6 +30,8 @@ cuda_device = 'cuda' if torch.cuda.is_available() else 'cpu'
30
  sr_model = RealESRGAN(torch.device('cuda' if torch.cuda.is_available() else 'cpu'), scale=2)
31
  sr_model.load_weights(f'weights/RealESRGAN_x2.pth', download=False)
32
 
 
 
33
  @torch.inference_mode()
34
  @spaces.GPU(duration=120)
35
  def multimodal_understanding(image, question, seed, top_p, temperature, progress=gr.Progress(track_tqdm=True)):
@@ -76,12 +78,13 @@ with gr.Blocks(css=css) as demo:
76
  image_input = gr.Image(label="Upload an image (optional)")
77
 
78
  def respond(message, image):
79
- # Here you can add logic to handle the image if provided
80
  if image is not None:
81
- # Call multimodal understanding with the image and message
82
  response = multimodal_understanding(image, message, seed=42, top_p=0.95, temperature=0.1)
 
 
83
  else:
84
- # If no image is provided, just respond with a text-based answer
85
  response = "Please provide an image for multimodal understanding."
86
 
87
  return response
 
30
  sr_model = RealESRGAN(torch.device('cuda' if torch.cuda.is_available() else 'cpu'), scale=2)
31
  sr_model.load_weights(f'weights/RealESRGAN_x2.pth', download=False)
32
 
33
+ last_uploaded_image = None
34
+
35
  @torch.inference_mode()
36
  @spaces.GPU(duration=120)
37
  def multimodal_understanding(image, question, seed, top_p, temperature, progress=gr.Progress(track_tqdm=True)):
 
78
  image_input = gr.Image(label="Upload an image (optional)")
79
 
80
  def respond(message, image):
81
+ global last_uploaded_image
82
  if image is not None:
83
+ last_uploaded_image = image # Update the last uploaded image
84
  response = multimodal_understanding(image, message, seed=42, top_p=0.95, temperature=0.1)
85
+ elif last_uploaded_image is not None:
86
+ response = multimodal_understanding(last_uploaded_image, message, seed=42, top_p=0.95, temperature=0.1)
87
  else:
 
88
  response = "Please provide an image for multimodal understanding."
89
 
90
  return response