Spaces:

sengourav012
/

virtual-try-on

Running

App Files Files Community

sengourav012 commited on 26 days ago

Commit

6706839

verified ·

1 Parent(s): 31d417a

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -81

app.py CHANGED Viewed

@@ -61,17 +61,7 @@ class UNetGenerator(nn.Module):
         u4 = self.up4(torch.cat([u3, d1], dim=1))
         return u4
-# ----------------- Image Transforms -----------------
-# img_transform = transforms.Compose([
-#     transforms.Resize((256, 192)),
-#     transforms.ToTensor(),
-#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
-# ])
-#new changes
-#end new changes
-# ----------------- Helper Functions -----------------
 def get_segmentation(image: Image.Image):
     inputs = processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
@@ -80,6 +70,7 @@ def get_segmentation(image: Image.Image):
     predicted = torch.argmax(logits, dim=1)[0].cpu().numpy()
     return predicted
 def generate_agnostic(image: Image.Image, segmentation):
     img_np = np.array(image.resize((192, 256)))
     agnostic_np = img_np.copy()
@@ -89,6 +80,7 @@ def generate_agnostic(image: Image.Image, segmentation):
         agnostic_np[segmentation_resized == label] = [128, 128, 128]
     return Image.fromarray(agnostic_np)
 def load_model(model_type):
     if model_type == "UNet":
         model = UNetGenerator().to(device)
@@ -112,69 +104,7 @@ def load_model(model_type):
     model.eval()
     return model
-# def generate_tryon_output(person_img, agnostic_img, cloth_img, segmentation, model):
-#     agnostic_tensor = img_transform(agnostic_img).unsqueeze(0).to(device)
-#     cloth_tensor = img_transform(cloth_img).unsqueeze(0).to(device)
-#     input_tensor = torch.cat([agnostic_tensor, cloth_tensor], dim=1)
-#     with torch.no_grad():
-#         output = model(input_tensor)
-#     output_img = output[0].cpu().permute(1, 2, 0).numpy()
-#     output_img = (output_img + 1) / 2
-#     output_img = np.clip(output_img, 0, 1)
-#     person_np = np.array(person_img.resize((192, 256))).astype(np.float32) / 255.0
-#     segmentation_resized = cv2.resize(segmentation.astype(np.uint8), (192, 256), interpolation=cv2.INTER_NEAREST)
-#     blend_mask = (segmentation_resized == 0).astype(np.float32)
-#     blend_mask = np.expand_dims(blend_mask, axis=2)
-#     final_output = blend_mask * person_np + (1 - blend_mask) * output_img
-#     final_output = (final_output * 255).astype(np.uint8)
-#     return Image.fromarray(final_output)
-#new changes
-# def generate_tryon_output(person_img, agnostic_img, cloth_img, segmentation, model, model_type):
-#     if model_type == "UNet":
-#         img_transform = transforms.Compose([
-#             transforms.Resize((256, 192)),
-#             transforms.ToTensor()
-#         ])
-#     else:
-#         img_transform = transforms.Compose([
-#             transforms.Resize((256, 192)),
-#             transforms.ToTensor(),
-#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
-#         ])
-#     agnostic_tensor = img_transform(agnostic_img).unsqueeze(0).to(device)
-#     cloth_tensor = img_transform(cloth_img).unsqueeze(0).to(device)
-#     input_tensor = torch.cat([agnostic_tensor, cloth_tensor], dim=1)
-#     with torch.no_grad():
-#         output = model(input_tensor)
-#     if model_type == "UNet":
-#         output_img = output.squeeze(0).cpu().permute(1, 2, 0).numpy()
-#         output_img = (output_img * 255).astype(np.uint8)
-#         return Image.fromarray(output_img)
-#     else:
-#         output_img = output[0].cpu().permute(1, 2, 0).numpy()
-#         output_img = (output_img + 1) / 2
-#         output_img = np.clip(output_img, 0, 1)
-#         person_np = np.array(person_img.resize((192, 256))).astype(np.float32) / 255.0
-#         segmentation_resized = cv2.resize(segmentation.astype(np.uint8), (192, 256), interpolation=cv2.INTER_NEAREST)
-#         blend_mask = (segmentation_resized == 0).astype(np.float32)
-#         blend_mask = np.expand_dims(blend_mask, axis=2)
-#         final_output = blend_mask * person_np + (1 - blend_mask) * output_img
-#         final_output = (final_output * 255).astype(np.uint8)
-#         return Image.fromarray(final_output)
 def generate_tryon_output(person_img, agnostic_img, cloth_img, segmentation, model, model_type):
     if model_type == "UNet":
         img_transform = transforms.Compose([
@@ -212,32 +142,92 @@ def generate_tryon_output(person_img, agnostic_img, cloth_img, segmentation, mod
         final_output = blend_mask * person_np + (1 - blend_mask) * output_img
         final_output = (final_output * 255).astype(np.uint8)
         return Image.fromarray(final_output)
-#new changes end
-# ----------------- Inference Pipeline -----------------
 def virtual_tryon(person_image, cloth_image, model_type):
     segmentation = get_segmentation(person_image)
     agnostic = generate_agnostic(person_image, segmentation)
     model = load_model(model_type)
     result = generate_tryon_output(person_image, agnostic, cloth_image, segmentation, model, model_type)
-    # result = generate_tryon_output(person_image, agnostic, cloth_image, segmentation, model)
     return agnostic, result
-# ----------------- Gradio Interface -----------------
 demo = gr.Interface(
     fn=virtual_tryon,
     inputs=[
         gr.Image(type="pil", label="Person Image"),
         gr.Image(type="pil", label="Cloth Image"),
-        gr.Radio(choices=["UNet", "GAN", "Diffusion"], label="Model Type", value="UNet")
     ],
     outputs=[
         gr.Image(type="pil", label="Agnostic (Torso Masked)"),
         gr.Image(type="pil", label="Virtual Try-On Output")
     ],
     title="👕 Virtual Try-On App",
-    description="Upload a person image and a clothing image, select a model (UNet, GAN, or Diffusion), and try it on virtually."
 )
 if __name__ == "__main__":

         u4 = self.up4(torch.cat([u3, d1], dim=1))
         return u4
+# ----------------- Image Segmentation -----------------
 def get_segmentation(image: Image.Image):
     inputs = processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
     predicted = torch.argmax(logits, dim=1)[0].cpu().numpy()
     return predicted
+# ----------------- Agnostic Creation -----------------
 def generate_agnostic(image: Image.Image, segmentation):
     img_np = np.array(image.resize((192, 256)))
     agnostic_np = img_np.copy()
         agnostic_np[segmentation_resized == label] = [128, 128, 128]
     return Image.fromarray(agnostic_np)
+# ----------------- Load Model -----------------
 def load_model(model_type):
     if model_type == "UNet":
         model = UNetGenerator().to(device)
     model.eval()
     return model
+# ----------------- Generate Try-On -----------------
 def generate_tryon_output(person_img, agnostic_img, cloth_img, segmentation, model, model_type):
     if model_type == "UNet":
         img_transform = transforms.Compose([
         final_output = blend_mask * person_np + (1 - blend_mask) * output_img
         final_output = (final_output * 255).astype(np.uint8)
         return Image.fromarray(final_output)
+# ----------------- Traditional CV Pipeline -----------------
+def create_agnostic_traditional(person_np, label_np):
+    mask = (label_np == 4).astype(np.uint8)
+    kernel = np.ones((7, 7), np.uint8)
+    dilated = cv2.dilate(mask, kernel, iterations=2)
+    agnostic = person_np.copy()
+    agnostic[dilated == 1] = [128, 128, 128]
+    return agnostic, dilated
+def improved_warp_cloth(cloth_np, person_np, label_np):
+    mask = (label_np == 4).astype(np.uint8) * 255
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return cloth_np
+    cnt = max(contours, key=cv2.contourArea)
+    x, y, w, h = cv2.boundingRect(cnt)
+    src_h, src_w = cloth_np.shape[:2]
+    src_points = np.array([[0,0],[src_w-1,0],[src_w-1,src_h-1],[0,src_h-1]], dtype=np.float32)
+    padding_x, padding_y = int(w*0.05), int(h*0.05)
+    dst_points = np.array([
+        [max(0, x - padding_x), max(0, y - padding_y)],
+        [min(person_np.shape[1] - 1, x + w + padding_x), max(0, y - padding_y)],
+        [min(person_np.shape[1] - 1, x + w + padding_x), min(person_np.shape[0] - 1, y + h + padding_y)],
+        [max(0, x - padding_x), min(person_np.shape[0] - 1, y + h + padding_y)]
+    ], dtype=np.float32)
+    M = cv2.getPerspectiveTransform(src_points, dst_points)
+    warped = cv2.warpPerspective(cloth_np, M, (person_np.shape[1], person_np.shape[0]), borderMode=cv2.BORDER_CONSTANT)
+    return warped
+def improved_blend_traditional(agnostic_np, warped_cloth_np, label_np):
+    target_mask = (label_np == 4).astype(np.uint8)
+    kernel = np.ones((9, 9), np.uint8)
+    target_mask = cv2.dilate(target_mask, kernel, iterations=2) * 255
+    gray = cv2.cvtColor(warped_cloth_np, cv2.COLOR_BGR2GRAY)
+    _, cloth_mask = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
+    combined_mask = cv2.bitwise_and(target_mask, cloth_mask)
+    combined_mask = cv2.GaussianBlur(combined_mask, (5, 5), 0)
+    M = cv2.moments(combined_mask)
+    center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])) if M["m00"] != 0 else (96, 128)
+    try:
+        output = cv2.seamlessClone(warped_cloth_np, agnostic_np, combined_mask, center, cv2.NORMAL_CLONE)
+    except:
+        mask_3d = np.stack([combined_mask / 255.0] * 3, axis=2)
+        output = warped_cloth_np * mask_3d + agnostic_np * (1 - mask_3d)
+        output = output.astype(np.uint8)
+    return output
+# ----------------- Main Pipeline -----------------
 def virtual_tryon(person_image, cloth_image, model_type):
+    if model_type == "Traditional":
+        person_np = np.array(person_image.resize((192, 256)))[:, :, ::-1]
+        cloth_np = np.array(cloth_image.resize((192, 256)))[:, :, ::-1]
+        segmentation = get_segmentation(person_image)
+        label_np = cv2.resize(segmentation.astype(np.uint8), (192, 256), interpolation=cv2.INTER_NEAREST)
+        agnostic_np, _ = create_agnostic_traditional(person_np, label_np)
+        warped_cloth = improved_warp_cloth(cloth_np, person_np, label_np)
+        output_np = improved_blend_traditional(agnostic_np, warped_cloth, label_np)
+        return Image.fromarray(agnostic_np[:, :, ::-1]), Image.fromarray(output_np[:, :, ::-1])
     segmentation = get_segmentation(person_image)
     agnostic = generate_agnostic(person_image, segmentation)
     model = load_model(model_type)
     result = generate_tryon_output(person_image, agnostic, cloth_image, segmentation, model, model_type)
     return agnostic, result
+# ----------------- Gradio UI -----------------
 demo = gr.Interface(
     fn=virtual_tryon,
     inputs=[
         gr.Image(type="pil", label="Person Image"),
         gr.Image(type="pil", label="Cloth Image"),
+        gr.Radio(choices=["UNet", "GAN", "Diffusion", "Traditional"], label="Model Type", value="UNet")
     ],
     outputs=[
         gr.Image(type="pil", label="Agnostic (Torso Masked)"),
         gr.Image(type="pil", label="Virtual Try-On Output")
     ],
     title="👕 Virtual Try-On App",
+    description="Upload a person image and a clothing image, select a model (UNet, GAN, Diffusion, Traditional), and try it on virtually."
 )
 if __name__ == "__main__":