Alex Hortua commited on
Commit
86e22bf
·
1 Parent(s): 18a6cb8

Adding Skeletong for detection

Browse files
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  /Images
2
  /Annotations
3
- .qodo
 
 
1
  /Images
2
  /Annotations
3
+ .qodo
4
+ venv/
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.18.0
8
- app_file: app.py
9
  pinned: false
10
  short_description: Using RCNN and Fully connected to detect Planes in objects
11
  ---
 
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.18.0
8
+ app_file: src/app.py
9
  pinned: false
10
  short_description: Using RCNN and Fully connected to detect Planes in objects
11
  ---
plots/resnet50.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import networkx as nx
3
+
4
+ # Create a directed graph to visualize ResNet-50 flow
5
+ G = nx.DiGraph()
6
+
7
+ # Add nodes for each processing step
8
+ G.add_node("Input Image", color="lightblue")
9
+ G.add_node("Conv + Pooling (Basic Feature Extraction)", color="blue")
10
+ G.add_node("Residual Block 1 (Basic Shapes & Textures)", color="green")
11
+ G.add_node("Residual Block 2 (Mid-Level Features)", color="green")
12
+ G.add_node("Residual Block 3 (Complex Features & Object Parts)", color="green")
13
+ G.add_node("Residual Block 4 (Full Object Representation)", color="green")
14
+ G.add_node("Global Average Pooling (Summarizing Features)", color="orange")
15
+ G.add_node("Fully Connected Layer (Classification)", color="red")
16
+ G.add_node("Output: Object Label (Dog, Plane, etc.)", color="yellow")
17
+
18
+ # Connect nodes to show data flow
19
+ edges = [
20
+ ("Input Image", "Conv + Pooling (Basic Feature Extraction)"),
21
+ ("Conv + Pooling (Basic Feature Extraction)", "Residual Block 1 (Basic Shapes & Textures)"),
22
+ ("Residual Block 1 (Basic Shapes & Textures)", "Residual Block 2 (Mid-Level Features)"),
23
+ ("Residual Block 2 (Mid-Level Features)", "Residual Block 3 (Complex Features & Object Parts)"),
24
+ ("Residual Block 3 (Complex Features & Object Parts)", "Residual Block 4 (Full Object Representation)"),
25
+ ("Residual Block 4 (Full Object Representation)", "Global Average Pooling (Summarizing Features)"),
26
+ ("Global Average Pooling (Summarizing Features)", "Fully Connected Layer (Classification)"),
27
+ ("Fully Connected Layer (Classification)", "Output: Object Label (Dog, Plane, etc.)")
28
+ ]
29
+
30
+ G.add_edges_from(edges)
31
+
32
+ # Draw the model architecture
33
+ plt.figure(figsize=(10, 6))
34
+ nx.draw(G, with_labels=True, node_color="lightblue", node_size=3000, edge_color="gray", font_size=8, font_weight="bold")
35
+ plt.title("📌 Visualization of ResNet-50 Image Processing Flow")
36
+ plt.show()
plots/training_progress.png ADDED
requirements.txt CHANGED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ pandas
4
+ gradio
5
+ Pillow
src/__pycache__/dataset.cpython-312.pyc ADDED
Binary file (4.26 kB). View file
 
src/__pycache__/inference.cpython-312.pyc ADDED
Binary file (2.06 kB). View file
 
src/app.py CHANGED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from inference import detect_planes
3
+ from PIL import Image, ImageDraw
4
+
5
+ def draw_boxes(image_path):
6
+ prediction = detect_planes(image_path)
7
+
8
+ image = Image.open(image_path).convert("RGB")
9
+ draw = ImageDraw.Draw(image)
10
+
11
+ print(prediction)
12
+
13
+ for i in range(len(prediction[0]["boxes"])):
14
+ box = prediction[0]["boxes"][i].cpu().numpy()
15
+ score = prediction[0]["scores"][i].item()
16
+ print(score)
17
+ if score > 0.1: # Confidence threshold
18
+ draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline="red", width=3)
19
+ draw.text((box[0], box[1] - 10), f"Plane {score:.2f}", fill="red")
20
+
21
+ return image
22
+
23
+ # Create Gradio UI
24
+ demo = gr.Interface(
25
+ fn=draw_boxes,
26
+ inputs=gr.Image(type="filepath"),
27
+ outputs=gr.Image(),
28
+ title="Plane Detector",
29
+ description="Upload an image, and the model will detect planes."
30
+ )
31
+
32
+ if __name__ == "__main__":
33
+ demo.launch()
src/dataset.py CHANGED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import torch
4
+ import torchvision.transforms as T
5
+ from PIL import Image
6
+ from torch.utils.data import Dataset
7
+
8
+ class PlaneDataset(Dataset):
9
+ def __init__(self, images_folder, annotations_folder, transform=None):
10
+ self.images_folder = images_folder
11
+ self.annotations_folder = annotations_folder
12
+ self.transform = transform or T.ToTensor()
13
+ self.image_filenames = [f for f in os.listdir(images_folder) if f.endswith(".jpg")]
14
+
15
+ def __len__(self):
16
+ return len(self.image_filenames)
17
+
18
+ def __getitem__(self, idx):
19
+ img_filename = self.image_filenames[idx]
20
+ img_path = os.path.join(self.images_folder, img_filename)
21
+
22
+ # Load and convert image
23
+ image = Image.open(img_path).convert("RGB")
24
+ image = self.transform(image)
25
+
26
+ # Read bounding boxes from CSV
27
+ annotation_file = os.path.join(self.annotations_folder, img_filename.replace(".jpg", ".csv"))
28
+
29
+ if not os.path.exists(annotation_file) or os.path.getsize(annotation_file) == 0:
30
+ print(f"⚠️ Warning: Annotation file {annotation_file} is missing or empty!")
31
+ return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
32
+
33
+ try:
34
+ bboxes_df = pd.read_csv(annotation_file, header=None, skiprows=1, sep=r"\s+")
35
+
36
+ # Check if valid bounding boxes exist (at least 4 values per row)
37
+ if bboxes_df.shape[1] != 4:
38
+ print(f"⚠️ Warning: Invalid bounding boxes in {annotation_file}, skipping...")
39
+ return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
40
+
41
+ bboxes_df.columns = ["xmin", "ymin", "xmax", "ymax"]
42
+ boxes = torch.tensor(bboxes_df[["xmin", "ymin", "xmax", "ymax"]].values, dtype=torch.float32)
43
+ labels = torch.ones((boxes.shape[0],), dtype=torch.int64)
44
+
45
+ except Exception as e:
46
+ print(f"❌ Error reading CSV {annotation_file}: {e}")
47
+ return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
48
+
49
+ target = {"boxes": boxes, "labels": labels}
50
+ return image, target
51
+
52
+ transform = T.Compose([
53
+ T.Resize((512, 512)),
54
+ T.ToTensor()
55
+ ])
56
+
57
+ dataset = PlaneDataset(images_folder="Images", annotations_folder="Annotations", transform=transform)
src/freezing_train.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.models as models
3
+ import torch.optim as optim
4
+ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
5
+ from torch.utils.data import DataLoader
6
+ from src.dataset import PlaneDataset, transform
7
+
8
+ # Load dataset
9
+ dataset = PlaneDataset("Images", "Annotations", transform=transform)
10
+ dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
11
+
12
+ # Load pre-trained Faster R-CNN model
13
+ model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
14
+
15
+ # Replace the classifier for detecting planes
16
+ num_classes = 2 # 1 for plane + 1 for background
17
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
18
+ model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
19
+
20
+ # Move model to GPU if available
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ model.to(device)
23
+
24
+ # ✅ **Freeze Backbone (Feature Extractor)**
25
+ for param in model.backbone.parameters():
26
+ param.requires_grad = False # Prevents updating backbone layers
27
+
28
+ # Train only the detection head (Region Proposal + Classifier)
29
+ optimizer = optim.Adam(model.roi_heads.parameters(), lr=0.0001)
30
+
31
+ # Training loop
32
+ num_epochs = 5
33
+ for epoch in range(num_epochs):
34
+ model.train()
35
+ total_loss = 0
36
+
37
+ for images, targets in dataloader:
38
+ images = [img.to(device) for img in images]
39
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
40
+
41
+ optimizer.zero_grad()
42
+ loss_dict = model(images, targets)
43
+ loss = sum(loss for loss in loss_dict.values())
44
+ loss.backward()
45
+ optimizer.step()
46
+
47
+ total_loss += loss.item()
48
+
49
+ print(f"Epoch {epoch+1}/{num_epochs} | Loss: {total_loss:.4f}")
50
+
51
+ # Save model
52
+ torch.save(model.state_dict(), "models/frozen_plane_detector.pth")
src/inference.py CHANGED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.transforms as T
3
+ from PIL import Image
4
+ import torchvision.models as models
5
+ from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
6
+
7
+ # Load model
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+ model = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
10
+ num_classes = 2
11
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
12
+ model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
13
+
14
+ model.load_state_dict(torch.load("models/plane_detector.pth", map_location=device))
15
+ model.to(device)
16
+ model.eval()
17
+
18
+ transform = T.Compose([
19
+ T.Resize((512, 512)),
20
+ T.ToTensor()
21
+ ])
22
+
23
+ def detect_planes(image_path):
24
+ image = Image.open(image_path).convert("RGB")
25
+ image_tensor = transform(image).unsqueeze(0).to(device)
26
+
27
+ with torch.no_grad():
28
+ prediction = model(image_tensor)
29
+
30
+ return prediction
src/train.py CHANGED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.models as models
3
+ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
4
+ from torch.utils.data import DataLoader
5
+ import torch.optim as optim
6
+ import matplotlib.pyplot as plt
7
+ from dataset import PlaneDataset, transform
8
+ from torchvision.ops import box_iou
9
+ import numpy as np
10
+
11
+ # Load dataset
12
+ dataset = PlaneDataset("Images", "Annotations", transform=transform)
13
+ dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
14
+
15
+ # Load pre-trained Faster R-CNN model
16
+ model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
17
+
18
+ # Replace the classifier for detecting planes
19
+ num_classes = 2 # 1 for plane + 1 for background
20
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
21
+ model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
22
+
23
+ # Move model to GPU if available
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ model.to(device)
26
+
27
+ optimizer = optim.Adam(model.parameters(), lr=0.0001)
28
+
29
+ # Track statistics
30
+ train_losses = []
31
+ mAPs = []
32
+
33
+ # Function to compute mAP (mean Average Precision)
34
+ def compute_mAP(model, dataloader, device):
35
+ model.eval()
36
+ iou_threshold = 0.5
37
+ all_precisions = []
38
+
39
+ with torch.no_grad():
40
+ for images, targets in dataloader:
41
+ images = [img.to(device) for img in images]
42
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
43
+
44
+ preds = model(images)
45
+
46
+ for pred, target in zip(preds, targets):
47
+ pred_boxes = pred["boxes"]
48
+ pred_scores = pred["scores"]
49
+ gt_boxes = target["boxes"]
50
+
51
+ if len(pred_boxes) == 0 or len(gt_boxes) == 0:
52
+ continue
53
+
54
+ ious = box_iou(pred_boxes, gt_boxes)
55
+ correct = (ious.max(dim=1).values > iou_threshold).float()
56
+ precision = correct.sum() / max(len(pred_boxes), 1)
57
+ all_precisions.append(precision.item())
58
+
59
+ return np.mean(all_precisions) if all_precisions else 0.0
60
+
61
+ # Training loop with statistics logging
62
+ num_epochs = 5
63
+ plt.ion() # Turn on interactive mode for live plotting
64
+
65
+ for epoch in range(num_epochs):
66
+ model.train()
67
+ total_loss = 0
68
+
69
+ for images, targets in dataloader:
70
+ images = [img.to(device) for img in images]
71
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
72
+
73
+ optimizer.zero_grad()
74
+ loss_dict = model(images, targets)
75
+ loss = sum(loss for loss in loss_dict.values())
76
+ loss.backward()
77
+ optimizer.step()
78
+
79
+ total_loss += loss.item()
80
+
81
+ # Compute and log statistics
82
+ avg_loss = total_loss / len(dataloader)
83
+ train_losses.append(avg_loss)
84
+ mAP = compute_mAP(model, dataloader, device)
85
+ mAPs.append(mAP)
86
+
87
+ print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | mAP: {mAP:.4f}")
88
+
89
+ # Live Plot Training Progress
90
+ plt.figure(figsize=(10, 5))
91
+ plt.clf()
92
+ plt.subplot(1, 2, 1)
93
+ plt.plot(train_losses, label="Loss")
94
+ plt.xlabel("Epoch")
95
+ plt.ylabel("Loss")
96
+ plt.legend()
97
+ plt.title("Training Loss")
98
+
99
+ plt.subplot(1, 2, 2)
100
+ plt.plot(mAPs, label="mAP")
101
+ plt.xlabel("Epoch")
102
+ plt.ylabel("mAP")
103
+ plt.legend()
104
+ plt.title("Mean Average Precision")
105
+
106
+ plt.pause(0.1)
107
+
108
+ # Save model
109
+ torch.save(model.state_dict(), "models/plane_detector.pth")
110
+ plt.ioff() # Turn off interactive mode
111
+ plt.show()
112
+ plt.savefig("plots/training_progress.png") # Show final plots