Spaces:

axelhortua
/

objectlocalization

Sleeping

App Files Files Community

Alex Hortua commited on Feb 26

Commit

86e22bf

1 Parent(s): 18a6cb8

Adding Skeletong for detection

Browse files

Files changed (12) hide show

.gitignore +2 -1
README.md +1 -1
plots/resnet50.py +36 -0
plots/training_progress.png +0 -0
requirements.txt +5 -0
src/__pycache__/dataset.cpython-312.pyc +0 -0
src/__pycache__/inference.cpython-312.pyc +0 -0
src/app.py +33 -0
src/dataset.py +57 -0
src/freezing_train.py +52 -0
src/inference.py +30 -0
src/train.py +112 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 /Images
 /Annotations
-.qodo

 /Images
 /Annotations
+.qodo
+venv/

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: blue
 colorTo: green
 sdk: gradio
 sdk_version: 5.18.0
-app_file: app.py
 pinned: false
 short_description: Using RCNN and Fully connected to detect Planes in objects
 ---

 colorTo: green
 sdk: gradio
 sdk_version: 5.18.0
+app_file: src/app.py
 pinned: false
 short_description: Using RCNN and Fully connected to detect Planes in objects
 ---

plots/resnet50.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import matplotlib.pyplot as plt
+import networkx as nx
+# Create a directed graph to visualize ResNet-50 flow
+G = nx.DiGraph()
+# Add nodes for each processing step
+G.add_node("Input Image", color="lightblue")
+G.add_node("Conv + Pooling (Basic Feature Extraction)", color="blue")
+G.add_node("Residual Block 1 (Basic Shapes & Textures)", color="green")
+G.add_node("Residual Block 2 (Mid-Level Features)", color="green")
+G.add_node("Residual Block 3 (Complex Features & Object Parts)", color="green")
+G.add_node("Residual Block 4 (Full Object Representation)", color="green")
+G.add_node("Global Average Pooling (Summarizing Features)", color="orange")
+G.add_node("Fully Connected Layer (Classification)", color="red")
+G.add_node("Output: Object Label (Dog, Plane, etc.)", color="yellow")
+# Connect nodes to show data flow
+edges = [
+    ("Input Image", "Conv + Pooling (Basic Feature Extraction)"),
+    ("Conv + Pooling (Basic Feature Extraction)", "Residual Block 1 (Basic Shapes & Textures)"),
+    ("Residual Block 1 (Basic Shapes & Textures)", "Residual Block 2 (Mid-Level Features)"),
+    ("Residual Block 2 (Mid-Level Features)", "Residual Block 3 (Complex Features & Object Parts)"),
+    ("Residual Block 3 (Complex Features & Object Parts)", "Residual Block 4 (Full Object Representation)"),
+    ("Residual Block 4 (Full Object Representation)", "Global Average Pooling (Summarizing Features)"),
+    ("Global Average Pooling (Summarizing Features)", "Fully Connected Layer (Classification)"),
+    ("Fully Connected Layer (Classification)", "Output: Object Label (Dog, Plane, etc.)")
+]
+G.add_edges_from(edges)
+# Draw the model architecture
+plt.figure(figsize=(10, 6))
+nx.draw(G, with_labels=True, node_color="lightblue", node_size=3000, edge_color="gray", font_size=8, font_weight="bold")
+plt.title("📌 Visualization of ResNet-50 Image Processing Flow")
+plt.show()

plots/training_progress.png ADDED Viewed

requirements.txt CHANGED Viewed

	@@ -0,0 +1,5 @@

+torch
+torchvision
+pandas
+gradio
+Pillow

src/__pycache__/dataset.cpython-312.pyc ADDED Viewed

Binary file (4.26 kB). View file

src/__pycache__/inference.cpython-312.pyc ADDED Viewed

Binary file (2.06 kB). View file

src/app.py CHANGED Viewed

	@@ -0,0 +1,33 @@

+import gradio as gr
+from inference import detect_planes
+from PIL import Image, ImageDraw
+def draw_boxes(image_path):
+    prediction = detect_planes(image_path)
+    image = Image.open(image_path).convert("RGB")
+    draw = ImageDraw.Draw(image)
+    print(prediction)
+    for i in range(len(prediction[0]["boxes"])):
+        box = prediction[0]["boxes"][i].cpu().numpy()
+        score = prediction[0]["scores"][i].item()
+        print(score)
+        if score > 0.1:  # Confidence threshold
+            draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline="red", width=3)
+            draw.text((box[0], box[1] - 10), f"Plane {score:.2f}", fill="red")
+    return image
+# Create Gradio UI
+demo = gr.Interface(
+    fn=draw_boxes,
+    inputs=gr.Image(type="filepath"),
+    outputs=gr.Image(),
+    title="Plane Detector",
+    description="Upload an image, and the model will detect planes."
+)
+if __name__ == "__main__":
+    demo.launch()

src/dataset.py CHANGED Viewed

	@@ -0,0 +1,57 @@

+import os
+import pandas as pd
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from torch.utils.data import Dataset
+class PlaneDataset(Dataset):
+    def __init__(self, images_folder, annotations_folder, transform=None):
+        self.images_folder = images_folder
+        self.annotations_folder = annotations_folder
+        self.transform = transform or T.ToTensor()
+        self.image_filenames = [f for f in os.listdir(images_folder) if f.endswith(".jpg")]
+    def __len__(self):
+        return len(self.image_filenames)
+    def __getitem__(self, idx):
+        img_filename = self.image_filenames[idx]
+        img_path = os.path.join(self.images_folder, img_filename)
+        # Load and convert image
+        image = Image.open(img_path).convert("RGB")
+        image = self.transform(image)
+        # Read bounding boxes from CSV
+        annotation_file = os.path.join(self.annotations_folder, img_filename.replace(".jpg", ".csv"))
+        if not os.path.exists(annotation_file) or os.path.getsize(annotation_file) == 0:
+            print(f"⚠️ Warning: Annotation file {annotation_file} is missing or empty!")
+            return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
+        try:
+            bboxes_df = pd.read_csv(annotation_file, header=None, skiprows=1, sep=r"\s+")
+            # Check if valid bounding boxes exist (at least 4 values per row)
+            if bboxes_df.shape[1] != 4:
+                print(f"⚠️ Warning: Invalid bounding boxes in {annotation_file}, skipping...")
+                return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
+            bboxes_df.columns = ["xmin", "ymin", "xmax", "ymax"]
+            boxes = torch.tensor(bboxes_df[["xmin", "ymin", "xmax", "ymax"]].values, dtype=torch.float32)
+            labels = torch.ones((boxes.shape[0],), dtype=torch.int64)
+        except Exception as e:
+            print(f"❌ Error reading CSV {annotation_file}: {e}")
+            return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
+        target = {"boxes": boxes, "labels": labels}
+        return image, target
+transform = T.Compose([
+    T.Resize((512, 512)),
+    T.ToTensor()
+])
+dataset = PlaneDataset(images_folder="Images", annotations_folder="Annotations", transform=transform)

src/freezing_train.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import torchvision.models as models
+import torch.optim as optim
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
+from torch.utils.data import DataLoader
+from src.dataset import PlaneDataset, transform
+# Load dataset
+dataset = PlaneDataset("Images", "Annotations", transform=transform)
+dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
+# Load pre-trained Faster R-CNN model
+model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
+# Replace the classifier for detecting planes
+num_classes = 2  # 1 for plane + 1 for background
+in_features = model.roi_heads.box_predictor.cls_score.in_features
+model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
+# Move model to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# ✅ **Freeze Backbone (Feature Extractor)**
+for param in model.backbone.parameters():
+    param.requires_grad = False  # Prevents updating backbone layers
+# Train only the detection head (Region Proposal + Classifier)
+optimizer = optim.Adam(model.roi_heads.parameters(), lr=0.0001)
+# Training loop
+num_epochs = 5
+for epoch in range(num_epochs):
+    model.train()
+    total_loss = 0
+    for images, targets in dataloader:
+        images = [img.to(device) for img in images]
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+        optimizer.zero_grad()
+        loss_dict = model(images, targets)
+        loss = sum(loss for loss in loss_dict.values())
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {total_loss:.4f}")
+# Save model
+torch.save(model.state_dict(), "models/frozen_plane_detector.pth")

src/inference.py CHANGED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import torchvision.transforms as T
+from PIL import Image
+import torchvision.models as models
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+# Load model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
+num_classes = 2
+in_features = model.roi_heads.box_predictor.cls_score.in_features
+model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+model.load_state_dict(torch.load("models/plane_detector.pth", map_location=device))
+model.to(device)
+model.eval()
+transform = T.Compose([
+    T.Resize((512, 512)),
+    T.ToTensor()
+])
+def detect_planes(image_path):
+    image = Image.open(image_path).convert("RGB")
+    image_tensor = transform(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        prediction = model(image_tensor)
+    return prediction

src/train.py CHANGED Viewed

	@@ -0,0 +1,112 @@

+import torch
+import torchvision.models as models
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
+from torch.utils.data import DataLoader
+import torch.optim as optim
+import matplotlib.pyplot as plt
+from dataset import PlaneDataset, transform
+from torchvision.ops import box_iou
+import numpy as np
+# Load dataset
+dataset = PlaneDataset("Images", "Annotations", transform=transform)
+dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
+# Load pre-trained Faster R-CNN model
+model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
+# Replace the classifier for detecting planes
+num_classes = 2  # 1 for plane + 1 for background
+in_features = model.roi_heads.box_predictor.cls_score.in_features
+model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
+# Move model to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+optimizer = optim.Adam(model.parameters(), lr=0.0001)
+# Track statistics
+train_losses = []
+mAPs = []
+# Function to compute mAP (mean Average Precision)
+def compute_mAP(model, dataloader, device):
+    model.eval()
+    iou_threshold = 0.5
+    all_precisions = []
+    with torch.no_grad():
+        for images, targets in dataloader:
+            images = [img.to(device) for img in images]
+            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+            preds = model(images)
+            for pred, target in zip(preds, targets):
+                pred_boxes = pred["boxes"]
+                pred_scores = pred["scores"]
+                gt_boxes = target["boxes"]
+                if len(pred_boxes) == 0 or len(gt_boxes) == 0:
+                    continue
+                ious = box_iou(pred_boxes, gt_boxes)
+                correct = (ious.max(dim=1).values > iou_threshold).float()
+                precision = correct.sum() / max(len(pred_boxes), 1)
+                all_precisions.append(precision.item())
+    return np.mean(all_precisions) if all_precisions else 0.0
+# Training loop with statistics logging
+num_epochs = 5
+plt.ion()  # Turn on interactive mode for live plotting
+for epoch in range(num_epochs):
+    model.train()
+    total_loss = 0
+    for images, targets in dataloader:
+        images = [img.to(device) for img in images]
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+        optimizer.zero_grad()
+        loss_dict = model(images, targets)
+        loss = sum(loss for loss in loss_dict.values())
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+    # Compute and log statistics
+    avg_loss = total_loss / len(dataloader)
+    train_losses.append(avg_loss)
+    mAP = compute_mAP(model, dataloader, device)
+    mAPs.append(mAP)
+    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | mAP: {mAP:.4f}")
+    # Live Plot Training Progress
+    plt.figure(figsize=(10, 5))
+    plt.clf()
+    plt.subplot(1, 2, 1)
+    plt.plot(train_losses, label="Loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("Loss")
+    plt.legend()
+    plt.title("Training Loss")
+    plt.subplot(1, 2, 2)
+    plt.plot(mAPs, label="mAP")
+    plt.xlabel("Epoch")
+    plt.ylabel("mAP")
+    plt.legend()
+    plt.title("Mean Average Precision")
+    plt.pause(0.1)
+# Save model
+torch.save(model.state_dict(), "models/plane_detector.pth")
+plt.ioff()  # Turn off interactive mode
+plt.show()
+plt.savefig("plots/training_progress.png") # Show final plots