File size: 5,760 Bytes
e2a8ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import cv2
import os
import numpy as np
import time
import random
class CNN(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(), nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(512 * 3 * 3, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
class WebcamPredictor:
def __init__(self, model_path):
# Check for CUDA availability
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
# Load the saved model
checkpoint = torch.load(model_path, map_location=self.device, weights_only= True)
self.class_names = checkpoint['class_names']
# Initialize model
self.model = CNN(num_classes=len(self.class_names))
self.model.load_state_dict(checkpoint['model_state_dict'])
self.model.to(self.device)
self.model.eval()
# Define image transformations
self.transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def predict_frame(self, frame):
try:
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (224, 224))
cv2.imshow("prediction", img)
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = img.astype(np.float32) / 255.0
img = (img - mean) / std
img = np.transpose(img, (2, 0, 1))
img_tensor = torch.from_numpy(img).unsqueeze(0).float().to(self.device)
# Predict
with torch.no_grad():
output = self.model(img_tensor)
probabilities = torch.nn.functional.softmax(output, dim=1)
predicted_idx = torch.argmax(probabilities, dim=1).item()
confidence = probabilities[0][predicted_idx].item()
predicted_class = self.class_names[predicted_idx]
return predicted_class, confidence
except Exception as e:
print(f"Error during prediction: {str(e)}")
return None, None
def start_webcam(self):
# Initialize webcam
cap = cv2.VideoCapture(0)
# Check if webcam is opened successfully
if not cap.isOpened():
print("Error: Could not open webcam")
return
# Set webcam properties (optional)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
print("Webcam started. Press 'q' to quit.")
# Variables for FPS calculation
fps_start_time = time.time()
fps_frame_count = 0
fps = 0
while True:
# Read frame from webcam
ret, frame = cap.read()
if not ret:
print("Error: Could not read frame")
break
# Calculate FPS
fps_frame_count += 1
if fps_frame_count >= 30:
fps = fps_frame_count / (time.time() - fps_start_time)
fps_start_time = time.time()
fps_frame_count = 0
# Make prediction
predicted_class, confidence = self.predict_frame(frame)
# Draw prediction and FPS on frame
if predicted_class is not None:
# Draw semi-transparent background for text
overlay = frame.copy()
cv2.rectangle(overlay, (10, 10), (400, 90), (0, 0, 0), -1)
alpha = 0.6
frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)
# Draw text
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, f"Class: {predicted_class}", (20, 40), font, 0.8, (0, 255, 0), 2)
cv2.putText(frame, f"Confidence: {confidence:.2%}", (20, 70), font, 0.8, (0, 255, 0), 2)
cv2.putText(frame, f"FPS: {fps:.1f}", (20, 100), font, 0.8, (0, 255, 0), 2)
# Display frame
cv2.imshow('Webcam Prediction', frame)
# Check for 'q' key to quit
if cv2.waitKey(30) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
def main():
# Model path - update this to your model's path
model_path = "saved_models/best_model.pth"
# Initialize predictor and start webcam
predictor = WebcamPredictor(model_path)
predictor.start_webcam()
if __name__ == "__main__":
main() |