Spaces:
Sleeping
Sleeping
Alex Hortua
commited on
Commit
·
86e22bf
1
Parent(s):
18a6cb8
Adding Skeletong for detection
Browse files- .gitignore +2 -1
- README.md +1 -1
- plots/resnet50.py +36 -0
- plots/training_progress.png +0 -0
- requirements.txt +5 -0
- src/__pycache__/dataset.cpython-312.pyc +0 -0
- src/__pycache__/inference.cpython-312.pyc +0 -0
- src/app.py +33 -0
- src/dataset.py +57 -0
- src/freezing_train.py +52 -0
- src/inference.py +30 -0
- src/train.py +112 -0
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
/Images
|
2 |
/Annotations
|
3 |
-
.qodo
|
|
|
|
1 |
/Images
|
2 |
/Annotations
|
3 |
+
.qodo
|
4 |
+
venv/
|
README.md
CHANGED
@@ -5,7 +5,7 @@ colorFrom: blue
|
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.18.0
|
8 |
-
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Using RCNN and Fully connected to detect Planes in objects
|
11 |
---
|
|
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.18.0
|
8 |
+
app_file: src/app.py
|
9 |
pinned: false
|
10 |
short_description: Using RCNN and Fully connected to detect Planes in objects
|
11 |
---
|
plots/resnet50.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import networkx as nx
|
3 |
+
|
4 |
+
# Create a directed graph to visualize ResNet-50 flow
|
5 |
+
G = nx.DiGraph()
|
6 |
+
|
7 |
+
# Add nodes for each processing step
|
8 |
+
G.add_node("Input Image", color="lightblue")
|
9 |
+
G.add_node("Conv + Pooling (Basic Feature Extraction)", color="blue")
|
10 |
+
G.add_node("Residual Block 1 (Basic Shapes & Textures)", color="green")
|
11 |
+
G.add_node("Residual Block 2 (Mid-Level Features)", color="green")
|
12 |
+
G.add_node("Residual Block 3 (Complex Features & Object Parts)", color="green")
|
13 |
+
G.add_node("Residual Block 4 (Full Object Representation)", color="green")
|
14 |
+
G.add_node("Global Average Pooling (Summarizing Features)", color="orange")
|
15 |
+
G.add_node("Fully Connected Layer (Classification)", color="red")
|
16 |
+
G.add_node("Output: Object Label (Dog, Plane, etc.)", color="yellow")
|
17 |
+
|
18 |
+
# Connect nodes to show data flow
|
19 |
+
edges = [
|
20 |
+
("Input Image", "Conv + Pooling (Basic Feature Extraction)"),
|
21 |
+
("Conv + Pooling (Basic Feature Extraction)", "Residual Block 1 (Basic Shapes & Textures)"),
|
22 |
+
("Residual Block 1 (Basic Shapes & Textures)", "Residual Block 2 (Mid-Level Features)"),
|
23 |
+
("Residual Block 2 (Mid-Level Features)", "Residual Block 3 (Complex Features & Object Parts)"),
|
24 |
+
("Residual Block 3 (Complex Features & Object Parts)", "Residual Block 4 (Full Object Representation)"),
|
25 |
+
("Residual Block 4 (Full Object Representation)", "Global Average Pooling (Summarizing Features)"),
|
26 |
+
("Global Average Pooling (Summarizing Features)", "Fully Connected Layer (Classification)"),
|
27 |
+
("Fully Connected Layer (Classification)", "Output: Object Label (Dog, Plane, etc.)")
|
28 |
+
]
|
29 |
+
|
30 |
+
G.add_edges_from(edges)
|
31 |
+
|
32 |
+
# Draw the model architecture
|
33 |
+
plt.figure(figsize=(10, 6))
|
34 |
+
nx.draw(G, with_labels=True, node_color="lightblue", node_size=3000, edge_color="gray", font_size=8, font_weight="bold")
|
35 |
+
plt.title("📌 Visualization of ResNet-50 Image Processing Flow")
|
36 |
+
plt.show()
|
plots/training_progress.png
ADDED
![]() |
requirements.txt
CHANGED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchvision
|
3 |
+
pandas
|
4 |
+
gradio
|
5 |
+
Pillow
|
src/__pycache__/dataset.cpython-312.pyc
ADDED
Binary file (4.26 kB). View file
|
|
src/__pycache__/inference.cpython-312.pyc
ADDED
Binary file (2.06 kB). View file
|
|
src/app.py
CHANGED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from inference import detect_planes
|
3 |
+
from PIL import Image, ImageDraw
|
4 |
+
|
5 |
+
def draw_boxes(image_path):
|
6 |
+
prediction = detect_planes(image_path)
|
7 |
+
|
8 |
+
image = Image.open(image_path).convert("RGB")
|
9 |
+
draw = ImageDraw.Draw(image)
|
10 |
+
|
11 |
+
print(prediction)
|
12 |
+
|
13 |
+
for i in range(len(prediction[0]["boxes"])):
|
14 |
+
box = prediction[0]["boxes"][i].cpu().numpy()
|
15 |
+
score = prediction[0]["scores"][i].item()
|
16 |
+
print(score)
|
17 |
+
if score > 0.1: # Confidence threshold
|
18 |
+
draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline="red", width=3)
|
19 |
+
draw.text((box[0], box[1] - 10), f"Plane {score:.2f}", fill="red")
|
20 |
+
|
21 |
+
return image
|
22 |
+
|
23 |
+
# Create Gradio UI
|
24 |
+
demo = gr.Interface(
|
25 |
+
fn=draw_boxes,
|
26 |
+
inputs=gr.Image(type="filepath"),
|
27 |
+
outputs=gr.Image(),
|
28 |
+
title="Plane Detector",
|
29 |
+
description="Upload an image, and the model will detect planes."
|
30 |
+
)
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
demo.launch()
|
src/dataset.py
CHANGED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as T
|
5 |
+
from PIL import Image
|
6 |
+
from torch.utils.data import Dataset
|
7 |
+
|
8 |
+
class PlaneDataset(Dataset):
|
9 |
+
def __init__(self, images_folder, annotations_folder, transform=None):
|
10 |
+
self.images_folder = images_folder
|
11 |
+
self.annotations_folder = annotations_folder
|
12 |
+
self.transform = transform or T.ToTensor()
|
13 |
+
self.image_filenames = [f for f in os.listdir(images_folder) if f.endswith(".jpg")]
|
14 |
+
|
15 |
+
def __len__(self):
|
16 |
+
return len(self.image_filenames)
|
17 |
+
|
18 |
+
def __getitem__(self, idx):
|
19 |
+
img_filename = self.image_filenames[idx]
|
20 |
+
img_path = os.path.join(self.images_folder, img_filename)
|
21 |
+
|
22 |
+
# Load and convert image
|
23 |
+
image = Image.open(img_path).convert("RGB")
|
24 |
+
image = self.transform(image)
|
25 |
+
|
26 |
+
# Read bounding boxes from CSV
|
27 |
+
annotation_file = os.path.join(self.annotations_folder, img_filename.replace(".jpg", ".csv"))
|
28 |
+
|
29 |
+
if not os.path.exists(annotation_file) or os.path.getsize(annotation_file) == 0:
|
30 |
+
print(f"⚠️ Warning: Annotation file {annotation_file} is missing or empty!")
|
31 |
+
return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
|
32 |
+
|
33 |
+
try:
|
34 |
+
bboxes_df = pd.read_csv(annotation_file, header=None, skiprows=1, sep=r"\s+")
|
35 |
+
|
36 |
+
# Check if valid bounding boxes exist (at least 4 values per row)
|
37 |
+
if bboxes_df.shape[1] != 4:
|
38 |
+
print(f"⚠️ Warning: Invalid bounding boxes in {annotation_file}, skipping...")
|
39 |
+
return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
|
40 |
+
|
41 |
+
bboxes_df.columns = ["xmin", "ymin", "xmax", "ymax"]
|
42 |
+
boxes = torch.tensor(bboxes_df[["xmin", "ymin", "xmax", "ymax"]].values, dtype=torch.float32)
|
43 |
+
labels = torch.ones((boxes.shape[0],), dtype=torch.int64)
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
print(f"❌ Error reading CSV {annotation_file}: {e}")
|
47 |
+
return image, {"boxes": torch.empty((0, 4), dtype=torch.float32), "labels": torch.empty((0,), dtype=torch.int64)}
|
48 |
+
|
49 |
+
target = {"boxes": boxes, "labels": labels}
|
50 |
+
return image, target
|
51 |
+
|
52 |
+
transform = T.Compose([
|
53 |
+
T.Resize((512, 512)),
|
54 |
+
T.ToTensor()
|
55 |
+
])
|
56 |
+
|
57 |
+
dataset = PlaneDataset(images_folder="Images", annotations_folder="Annotations", transform=transform)
|
src/freezing_train.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchvision.models as models
|
3 |
+
import torch.optim as optim
|
4 |
+
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
|
5 |
+
from torch.utils.data import DataLoader
|
6 |
+
from src.dataset import PlaneDataset, transform
|
7 |
+
|
8 |
+
# Load dataset
|
9 |
+
dataset = PlaneDataset("Images", "Annotations", transform=transform)
|
10 |
+
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
|
11 |
+
|
12 |
+
# Load pre-trained Faster R-CNN model
|
13 |
+
model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
|
14 |
+
|
15 |
+
# Replace the classifier for detecting planes
|
16 |
+
num_classes = 2 # 1 for plane + 1 for background
|
17 |
+
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
18 |
+
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
|
19 |
+
|
20 |
+
# Move model to GPU if available
|
21 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
+
model.to(device)
|
23 |
+
|
24 |
+
# ✅ **Freeze Backbone (Feature Extractor)**
|
25 |
+
for param in model.backbone.parameters():
|
26 |
+
param.requires_grad = False # Prevents updating backbone layers
|
27 |
+
|
28 |
+
# Train only the detection head (Region Proposal + Classifier)
|
29 |
+
optimizer = optim.Adam(model.roi_heads.parameters(), lr=0.0001)
|
30 |
+
|
31 |
+
# Training loop
|
32 |
+
num_epochs = 5
|
33 |
+
for epoch in range(num_epochs):
|
34 |
+
model.train()
|
35 |
+
total_loss = 0
|
36 |
+
|
37 |
+
for images, targets in dataloader:
|
38 |
+
images = [img.to(device) for img in images]
|
39 |
+
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
40 |
+
|
41 |
+
optimizer.zero_grad()
|
42 |
+
loss_dict = model(images, targets)
|
43 |
+
loss = sum(loss for loss in loss_dict.values())
|
44 |
+
loss.backward()
|
45 |
+
optimizer.step()
|
46 |
+
|
47 |
+
total_loss += loss.item()
|
48 |
+
|
49 |
+
print(f"Epoch {epoch+1}/{num_epochs} | Loss: {total_loss:.4f}")
|
50 |
+
|
51 |
+
# Save model
|
52 |
+
torch.save(model.state_dict(), "models/frozen_plane_detector.pth")
|
src/inference.py
CHANGED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchvision.transforms as T
|
3 |
+
from PIL import Image
|
4 |
+
import torchvision.models as models
|
5 |
+
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
6 |
+
|
7 |
+
# Load model
|
8 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
+
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
|
10 |
+
num_classes = 2
|
11 |
+
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
12 |
+
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
13 |
+
|
14 |
+
model.load_state_dict(torch.load("models/plane_detector.pth", map_location=device))
|
15 |
+
model.to(device)
|
16 |
+
model.eval()
|
17 |
+
|
18 |
+
transform = T.Compose([
|
19 |
+
T.Resize((512, 512)),
|
20 |
+
T.ToTensor()
|
21 |
+
])
|
22 |
+
|
23 |
+
def detect_planes(image_path):
|
24 |
+
image = Image.open(image_path).convert("RGB")
|
25 |
+
image_tensor = transform(image).unsqueeze(0).to(device)
|
26 |
+
|
27 |
+
with torch.no_grad():
|
28 |
+
prediction = model(image_tensor)
|
29 |
+
|
30 |
+
return prediction
|
src/train.py
CHANGED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchvision.models as models
|
3 |
+
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
|
4 |
+
from torch.utils.data import DataLoader
|
5 |
+
import torch.optim as optim
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from dataset import PlaneDataset, transform
|
8 |
+
from torchvision.ops import box_iou
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
# Load dataset
|
12 |
+
dataset = PlaneDataset("Images", "Annotations", transform=transform)
|
13 |
+
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
|
14 |
+
|
15 |
+
# Load pre-trained Faster R-CNN model
|
16 |
+
model = models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
|
17 |
+
|
18 |
+
# Replace the classifier for detecting planes
|
19 |
+
num_classes = 2 # 1 for plane + 1 for background
|
20 |
+
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
21 |
+
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
|
22 |
+
|
23 |
+
# Move model to GPU if available
|
24 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
25 |
+
model.to(device)
|
26 |
+
|
27 |
+
optimizer = optim.Adam(model.parameters(), lr=0.0001)
|
28 |
+
|
29 |
+
# Track statistics
|
30 |
+
train_losses = []
|
31 |
+
mAPs = []
|
32 |
+
|
33 |
+
# Function to compute mAP (mean Average Precision)
|
34 |
+
def compute_mAP(model, dataloader, device):
|
35 |
+
model.eval()
|
36 |
+
iou_threshold = 0.5
|
37 |
+
all_precisions = []
|
38 |
+
|
39 |
+
with torch.no_grad():
|
40 |
+
for images, targets in dataloader:
|
41 |
+
images = [img.to(device) for img in images]
|
42 |
+
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
43 |
+
|
44 |
+
preds = model(images)
|
45 |
+
|
46 |
+
for pred, target in zip(preds, targets):
|
47 |
+
pred_boxes = pred["boxes"]
|
48 |
+
pred_scores = pred["scores"]
|
49 |
+
gt_boxes = target["boxes"]
|
50 |
+
|
51 |
+
if len(pred_boxes) == 0 or len(gt_boxes) == 0:
|
52 |
+
continue
|
53 |
+
|
54 |
+
ious = box_iou(pred_boxes, gt_boxes)
|
55 |
+
correct = (ious.max(dim=1).values > iou_threshold).float()
|
56 |
+
precision = correct.sum() / max(len(pred_boxes), 1)
|
57 |
+
all_precisions.append(precision.item())
|
58 |
+
|
59 |
+
return np.mean(all_precisions) if all_precisions else 0.0
|
60 |
+
|
61 |
+
# Training loop with statistics logging
|
62 |
+
num_epochs = 5
|
63 |
+
plt.ion() # Turn on interactive mode for live plotting
|
64 |
+
|
65 |
+
for epoch in range(num_epochs):
|
66 |
+
model.train()
|
67 |
+
total_loss = 0
|
68 |
+
|
69 |
+
for images, targets in dataloader:
|
70 |
+
images = [img.to(device) for img in images]
|
71 |
+
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
72 |
+
|
73 |
+
optimizer.zero_grad()
|
74 |
+
loss_dict = model(images, targets)
|
75 |
+
loss = sum(loss for loss in loss_dict.values())
|
76 |
+
loss.backward()
|
77 |
+
optimizer.step()
|
78 |
+
|
79 |
+
total_loss += loss.item()
|
80 |
+
|
81 |
+
# Compute and log statistics
|
82 |
+
avg_loss = total_loss / len(dataloader)
|
83 |
+
train_losses.append(avg_loss)
|
84 |
+
mAP = compute_mAP(model, dataloader, device)
|
85 |
+
mAPs.append(mAP)
|
86 |
+
|
87 |
+
print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | mAP: {mAP:.4f}")
|
88 |
+
|
89 |
+
# Live Plot Training Progress
|
90 |
+
plt.figure(figsize=(10, 5))
|
91 |
+
plt.clf()
|
92 |
+
plt.subplot(1, 2, 1)
|
93 |
+
plt.plot(train_losses, label="Loss")
|
94 |
+
plt.xlabel("Epoch")
|
95 |
+
plt.ylabel("Loss")
|
96 |
+
plt.legend()
|
97 |
+
plt.title("Training Loss")
|
98 |
+
|
99 |
+
plt.subplot(1, 2, 2)
|
100 |
+
plt.plot(mAPs, label="mAP")
|
101 |
+
plt.xlabel("Epoch")
|
102 |
+
plt.ylabel("mAP")
|
103 |
+
plt.legend()
|
104 |
+
plt.title("Mean Average Precision")
|
105 |
+
|
106 |
+
plt.pause(0.1)
|
107 |
+
|
108 |
+
# Save model
|
109 |
+
torch.save(model.state_dict(), "models/plane_detector.pth")
|
110 |
+
plt.ioff() # Turn off interactive mode
|
111 |
+
plt.show()
|
112 |
+
plt.savefig("plots/training_progress.png") # Show final plots
|