Spaces:

Boni98
/

CLIP-Guess-the-Image

Runtime error

App Files Files Community

Boni98 commited on May 7, 2023

Commit

b94b68d

1 Parent(s): 6e120a4

Update clip_chat.py

Browse files

Files changed (1) hide show

clip_chat.py +11 -7

clip_chat.py CHANGED Viewed

@@ -7,15 +7,14 @@ from random import choice
 device = "cuda" if torch.cuda.is_available() else "cpu"
-available_models = ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px']
-model, preprocess = clip.load(available_models[-1], device=device)
 COCO = glob.glob(os.path.join(os.getcwd(), "images", "*"))
 def load_random_image():
-    image_path = choice(COCO)
     image = Image.open(image_path)
     return image
@@ -26,6 +25,10 @@ def next_image():
     image = preprocess(Image.fromarray(image_org)).unsqueeze(0).to(device)
 def calculate_logits(image_features, text_features):
     image_features = image_features / image_features.norm(dim=1, keepdim=True)
     text_features = text_features / text_features.norm(dim=1, keepdim=True)
@@ -37,7 +40,7 @@ def calculate_logits(image_features, text_features):
 last = -1
 best = -1
-goal = 21
 image_org = load_random_image()
 image = preprocess(image_org).unsqueeze(0).to(device)
@@ -52,8 +55,9 @@ def answer(message):
     with torch.no_grad():
         text_features = model.encode_text(text)
-        logits_per_image, _ = model(image, text)
         logits = calculate_logits(image_features, text_features).cpu().numpy().flatten()[0]
     if last == -1:
         is_better = -1
@@ -78,6 +82,6 @@ def reset_everything():
     global last, best, goal, image, image_org
     last = -1
     best = -1
-    goal = 21
     image_org = load_random_image()
     image = preprocess(image_org).unsqueeze(0).to(device)

 device = "cuda" if torch.cuda.is_available() else "cpu"
+model, preprocess = clip.load("ViT-L/14@336px", device=device)
 COCO = glob.glob(os.path.join(os.getcwd(), "images", "*"))
+available_models = ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px']
 def load_random_image():
+    image_path = COCO[0]  # choice(COCO)
     image = Image.open(image_path)
     return image
     image = preprocess(Image.fromarray(image_org)).unsqueeze(0).to(device)
+# def calculate_logits(image, text):
+#     return model(image, text)[0]
 def calculate_logits(image_features, text_features):
     image_features = image_features / image_features.norm(dim=1, keepdim=True)
     text_features = text_features / text_features.norm(dim=1, keepdim=True)
 last = -1
 best = -1
+goal = 23
 image_org = load_random_image()
 image = preprocess(image_org).unsqueeze(0).to(device)
     with torch.no_grad():
         text_features = model.encode_text(text)
+        # logits_per_image, _ = model(image, text)
         logits = calculate_logits(image_features, text_features).cpu().numpy().flatten()[0]
+        # logits = calculate_logits(image, text)
     if last == -1:
         is_better = -1
     global last, best, goal, image, image_org
     last = -1
     best = -1
+    goal = 23
     image_org = load_random_image()
     image = preprocess(image_org).unsqueeze(0).to(device)