File size: 1,131 Bytes
c5ee215
 
f994a03
c5ee215
f994a03
c5ee215
 
f994a03
c5ee215
 
d5cb4e0
 
 
c5ee215
d5cb4e0
c5ee215
 
 
d5cb4e0
c5ee215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import torch
import spaces  # Import early to avoid potential issues
import gradio as gr
from transformers import CLIPProcessor, CLIPModel

# Load the CLIP model and processor on the CPU initially
model_name = "openai/clip-vit-base-patch32"

@spaces.GPU
def clip_similarity(image, text):
    # Load the model and processor inside GPU context
    model = CLIPModel.from_pretrained(model_name)
    processor = CLIPProcessor.from_pretrained(model_name)
    
    device = torch.device("cuda")
    model.to(device)
    
    inputs = processor(text=[text], images=image, return_tensors="pt", padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    outputs = model(**inputs)
    similarity_score = outputs.logits_per_image.detach().cpu().numpy()[0]
    return float(similarity_score)

# Set up the Gradio interface
iface = gr.Interface(
    fn=clip_similarity,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Text(label="Input Text")
    ],
    outputs=gr.Number(label="Similarity Score"),
    title="CLIP Similarity Demo with ZeroGPU"
)

if __name__ == "__main__":
    iface.launch()