Spaces:
Sleeping
Sleeping
Commit
·
2d8aec0
1
Parent(s):
df34abb
chore: reformat code
Browse files
app.py
CHANGED
@@ -6,16 +6,15 @@ import torch
|
|
6 |
import PIL.Image as Image
|
7 |
|
8 |
# Set device to GPU if available
|
9 |
-
device = torch.device(
|
10 |
|
11 |
# Load the OpenCLIP model and the necessary preprocessors
|
12 |
# openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
|
13 |
# openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
|
14 |
-
openclip_model_name =
|
15 |
openclip_model = "hf-hub:" + openclip_model_name
|
16 |
model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
|
17 |
-
model_name=openclip_model,
|
18 |
-
device=device
|
19 |
)
|
20 |
|
21 |
|
@@ -66,7 +65,10 @@ def generate_text_embedding(text_data: Union[str, tuple[str]]) -> list[str]:
|
|
66 |
text_embeddings = model.encode_text(text_data)
|
67 |
|
68 |
# Convert embeddings to list of strings
|
69 |
-
text_embeddings = [
|
|
|
|
|
|
|
70 |
|
71 |
# Insert empty strings at indices of empty text strings
|
72 |
for i in empty_data_indices:
|
@@ -74,8 +76,11 @@ def generate_text_embedding(text_data: Union[str, tuple[str]]) -> list[str]:
|
|
74 |
|
75 |
return text_embeddings
|
76 |
|
|
|
77 |
# Define function to generate image embeddings
|
78 |
-
def generate_image_embedding(
|
|
|
|
|
79 |
"""
|
80 |
Generate embeddings for image data using the OpenCLIP model.
|
81 |
|
@@ -118,7 +123,10 @@ def generate_image_embedding(image_data: Union[Image.Image, tuple[Image.Image]])
|
|
118 |
image_embeddings = model.encode_image(image_data)
|
119 |
|
120 |
# Convert embeddings to list of strings
|
121 |
-
image_embeddings = [
|
|
|
|
|
|
|
122 |
|
123 |
# Insert empty strings at indices of empty images
|
124 |
for i in empty_data_indices:
|
@@ -128,7 +136,10 @@ def generate_image_embedding(image_data: Union[Image.Image, tuple[Image.Image]])
|
|
128 |
|
129 |
|
130 |
# Define function to generate embeddings
|
131 |
-
def generate_embedding(
|
|
|
|
|
|
|
132 |
"""
|
133 |
Generate embeddings for text and image data using the OpenCLIP model.
|
134 |
|
@@ -162,7 +173,9 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
162 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
163 |
text_embeddings_filtered = []
|
164 |
image_embeddings_filtered = []
|
165 |
-
for i, (text_embedding, image_embedding) in enumerate(
|
|
|
|
|
166 |
if text_embedding != "" and image_embedding != "":
|
167 |
text_embeddings_filtered.append(text_embedding)
|
168 |
image_embeddings_filtered.append(image_embedding)
|
@@ -176,11 +189,18 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
176 |
image_embeddings_tensor = torch.tensor(image_embeddings_filtered)
|
177 |
|
178 |
# Normalize the embeddings
|
179 |
-
text_embedding_norm = text_embeddings_tensor / text_embeddings_tensor.norm(
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
# Calculate cosine similarity
|
183 |
-
similarity = torch.nn.functional.cosine_similarity(
|
|
|
|
|
184 |
# Convert to percentage as text
|
185 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
186 |
|
@@ -195,7 +215,12 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
195 |
demo = gr.Interface(
|
196 |
fn=generate_embedding,
|
197 |
inputs=[
|
198 |
-
gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
199 |
gr.Image(height=512, type="pil", label="Image to Embed"),
|
200 |
],
|
201 |
outputs=[
|
@@ -208,7 +233,7 @@ demo = gr.Interface(
|
|
208 |
description="Generate embeddings using OpenCLIP model for text and images.",
|
209 |
allow_flagging="never",
|
210 |
batch=False,
|
211 |
-
api_name="embed"
|
212 |
)
|
213 |
|
214 |
# Enable queueing and launch the app
|
|
|
6 |
import PIL.Image as Image
|
7 |
|
8 |
# Set device to GPU if available
|
9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
|
11 |
# Load the OpenCLIP model and the necessary preprocessors
|
12 |
# openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
|
13 |
# openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
|
14 |
+
openclip_model_name = "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
|
15 |
openclip_model = "hf-hub:" + openclip_model_name
|
16 |
model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
|
17 |
+
model_name=openclip_model, device=device
|
|
|
18 |
)
|
19 |
|
20 |
|
|
|
65 |
text_embeddings = model.encode_text(text_data)
|
66 |
|
67 |
# Convert embeddings to list of strings
|
68 |
+
text_embeddings = [
|
69 |
+
embedding.detach().cpu().numpy().tolist()
|
70 |
+
for embedding in text_embeddings
|
71 |
+
]
|
72 |
|
73 |
# Insert empty strings at indices of empty text strings
|
74 |
for i in empty_data_indices:
|
|
|
76 |
|
77 |
return text_embeddings
|
78 |
|
79 |
+
|
80 |
# Define function to generate image embeddings
|
81 |
+
def generate_image_embedding(
|
82 |
+
image_data: Union[Image.Image, tuple[Image.Image]]
|
83 |
+
) -> list[str]:
|
84 |
"""
|
85 |
Generate embeddings for image data using the OpenCLIP model.
|
86 |
|
|
|
123 |
image_embeddings = model.encode_image(image_data)
|
124 |
|
125 |
# Convert embeddings to list of strings
|
126 |
+
image_embeddings = [
|
127 |
+
embedding.detach().cpu().numpy().tolist()
|
128 |
+
for embedding in image_embeddings
|
129 |
+
]
|
130 |
|
131 |
# Insert empty strings at indices of empty images
|
132 |
for i in empty_data_indices:
|
|
|
136 |
|
137 |
|
138 |
# Define function to generate embeddings
|
139 |
+
def generate_embedding(
|
140 |
+
text_data: Union[str, tuple[str]],
|
141 |
+
image_data: Union[Image.Image, tuple[Image.Image]],
|
142 |
+
) -> tuple[list[str], list[str], list[str]]:
|
143 |
"""
|
144 |
Generate embeddings for text and image data using the OpenCLIP model.
|
145 |
|
|
|
173 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
174 |
text_embeddings_filtered = []
|
175 |
image_embeddings_filtered = []
|
176 |
+
for i, (text_embedding, image_embedding) in enumerate(
|
177 |
+
zip(text_embeddings, image_embeddings)
|
178 |
+
):
|
179 |
if text_embedding != "" and image_embedding != "":
|
180 |
text_embeddings_filtered.append(text_embedding)
|
181 |
image_embeddings_filtered.append(image_embedding)
|
|
|
189 |
image_embeddings_tensor = torch.tensor(image_embeddings_filtered)
|
190 |
|
191 |
# Normalize the embeddings
|
192 |
+
text_embedding_norm = text_embeddings_tensor / text_embeddings_tensor.norm(
|
193 |
+
dim=-1, keepdim=True
|
194 |
+
)
|
195 |
+
image_embedding_norm = (
|
196 |
+
image_embeddings_tensor
|
197 |
+
/ image_embeddings_tensor.norm(dim=-1, keepdim=True)
|
198 |
+
)
|
199 |
|
200 |
# Calculate cosine similarity
|
201 |
+
similarity = torch.nn.functional.cosine_similarity(
|
202 |
+
text_embedding_norm, image_embedding_norm, dim=-1
|
203 |
+
)
|
204 |
# Convert to percentage as text
|
205 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
206 |
|
|
|
215 |
demo = gr.Interface(
|
216 |
fn=generate_embedding,
|
217 |
inputs=[
|
218 |
+
gr.Textbox(
|
219 |
+
lines=5,
|
220 |
+
max_lines=5,
|
221 |
+
placeholder="Enter Text Here...",
|
222 |
+
label="Text to Embed",
|
223 |
+
),
|
224 |
gr.Image(height=512, type="pil", label="Image to Embed"),
|
225 |
],
|
226 |
outputs=[
|
|
|
233 |
description="Generate embeddings using OpenCLIP model for text and images.",
|
234 |
allow_flagging="never",
|
235 |
batch=False,
|
236 |
+
api_name="embed",
|
237 |
)
|
238 |
|
239 |
# Enable queueing and launch the app
|