Spaces:
Sleeping
Sleeping
Update tasks/image.py
Browse files- tasks/image.py +103 -5
tasks/image.py
CHANGED
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from fastapi import APIRouter
|
2 |
from datetime import datetime
|
3 |
from datasets import load_dataset
|
@@ -100,21 +106,113 @@ async def evaluate_image(request: ImageEvaluationRequest):
|
|
100 |
# Update the code below to replace the random baseline with your model inference
|
101 |
#--------------------------------------------------------------------------------------------
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
predictions = []
|
104 |
true_labels = []
|
105 |
pred_boxes = []
|
106 |
true_boxes_list = [] # List of lists, each inner list contains boxes for one image
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
for example in test_dataset:
|
109 |
# Parse true annotation (YOLO format: class_id x_center y_center width height)
|
110 |
annotation = example.get("annotations", "").strip()
|
111 |
has_smoke = len(annotation) > 0
|
112 |
true_labels.append(int(has_smoke))
|
113 |
-
|
114 |
-
# Make random classification prediction
|
115 |
-
pred_has_smoke = random.random() > 0.5
|
116 |
-
predictions.append(int(pred_has_smoke))
|
117 |
-
|
118 |
# If there's a true box, parse it and make random box prediction
|
119 |
if has_smoke:
|
120 |
# Parse all true boxes from the annotation
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.optim as optim
|
4 |
+
from torchvision import transforms
|
5 |
+
from torch.utils.data import DataLoader, Dataset
|
6 |
+
|
7 |
from fastapi import APIRouter
|
8 |
from datetime import datetime
|
9 |
from datasets import load_dataset
|
|
|
106 |
# Update the code below to replace the random baseline with your model inference
|
107 |
#--------------------------------------------------------------------------------------------
|
108 |
|
109 |
+
class ImageClassifier(nn.Module):
|
110 |
+
def __init__(self):
|
111 |
+
super(ImageClassifier, self).__init__()
|
112 |
+
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
|
113 |
+
self.relu1 = nn.ReLU()
|
114 |
+
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
|
115 |
+
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
|
116 |
+
self.relu2 = nn.ReLU()
|
117 |
+
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
|
118 |
+
self.fc1 = nn.Linear(64 * 16 * 16, 128)
|
119 |
+
self.relu3 = nn.ReLU()
|
120 |
+
self.fc2 = nn.Linear(128, 2) # Output layer with 2 classes (0, 1)
|
121 |
+
|
122 |
+
def forward(self, x):
|
123 |
+
x = self.pool1(self.relu1(self.conv1(x)))
|
124 |
+
x = self.pool2(self.relu2(self.conv2(x)))
|
125 |
+
x = x.view(x.size(0), -1)
|
126 |
+
x = self.relu3(self.fc1(x))
|
127 |
+
x = self.fc2(x)
|
128 |
+
return x
|
129 |
+
|
130 |
+
class CustomDataset(Dataset, labels):
|
131 |
+
def __init__(self, dataset, transform=None):
|
132 |
+
self.dataset = dataset
|
133 |
+
self.transform = transform
|
134 |
+
self.labels = labels
|
135 |
+
|
136 |
+
def __len__(self):
|
137 |
+
return len(self.dataset)
|
138 |
+
|
139 |
+
def __getitem__(self, idx):
|
140 |
+
image = self.dataset[idx]['image']
|
141 |
+
label = self.labels[idx]
|
142 |
+
|
143 |
+
if self.transform:
|
144 |
+
image = self.transform(image)
|
145 |
+
|
146 |
+
return image, label
|
147 |
+
|
148 |
+
# Create an instance of the model
|
149 |
+
model = ImageClassifier()
|
150 |
+
|
151 |
+
# Define loss function and optimizer
|
152 |
+
criterion = nn.CrossEntropyLoss()
|
153 |
+
optimizer = optim.SGD(model.parameters(), lr=0.1)
|
154 |
+
|
155 |
predictions = []
|
156 |
true_labels = []
|
157 |
pred_boxes = []
|
158 |
true_boxes_list = [] # List of lists, each inner list contains boxes for one image
|
159 |
|
160 |
+
# Data Augmentation:
|
161 |
+
torch.manual_seed(0)
|
162 |
+
|
163 |
+
transform = transforms.Compose([
|
164 |
+
transforms.RandomCrop(size=(512, 512)), # Crop an image to reduce informations
|
165 |
+
transforms.Resize(size=(64, 64)), # Resize to a standard size, experiment with different sizes
|
166 |
+
transforms.RandomHorizontalFlip(),
|
167 |
+
transforms.RandomVerticalFlip(),
|
168 |
+
transforms.RandomRotation(30), # Add random rotations
|
169 |
+
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Color variations
|
170 |
+
transforms.ToTensor(),
|
171 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize for ImageNet
|
172 |
+
])
|
173 |
+
|
174 |
+
# Dataset Loader for CNN computation
|
175 |
+
train_loader = DataLoader(train_test, batch_size=64, shuffle=False)
|
176 |
+
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
|
177 |
+
|
178 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
179 |
+
model.to(device)
|
180 |
+
|
181 |
+
|
182 |
+
# Training loop
|
183 |
+
num_epochs = 10
|
184 |
+
for epoch in range(num_epochs):
|
185 |
+
for images, labels in train_loader :
|
186 |
+
images, labels = images.to(device), labels.to(device)
|
187 |
+
# Zero the parameter gradients
|
188 |
+
optimizer.zero_grad()
|
189 |
+
|
190 |
+
# Forward + backward + optimize
|
191 |
+
outputs = model(images)
|
192 |
+
loss = criterion(outputs, labels)
|
193 |
+
loss.backward()
|
194 |
+
optimizer.step()
|
195 |
+
print(f'Epoch [{epoch + 1}/10], Loss: {loss.item():.4f}')
|
196 |
+
|
197 |
+
# Evaluation loop
|
198 |
+
model.eval() # Set the model to evaluation mode
|
199 |
+
|
200 |
+
with torch.no_grad():
|
201 |
+
for images, labels in test_loader:
|
202 |
+
images, labels = images.to(device), labels.to(device)
|
203 |
+
outputs = model(images)
|
204 |
+
# Apply sigmoid to get probabilities
|
205 |
+
probabilities = torch.sigmoid(outputs)
|
206 |
+
#Get the predicted class with maximum probability
|
207 |
+
_, prediction = torch.max(probabilities, 1)
|
208 |
+
predictions.extend(prediction.cpu().numpy())
|
209 |
+
|
210 |
for example in test_dataset:
|
211 |
# Parse true annotation (YOLO format: class_id x_center y_center width height)
|
212 |
annotation = example.get("annotations", "").strip()
|
213 |
has_smoke = len(annotation) > 0
|
214 |
true_labels.append(int(has_smoke))
|
215 |
+
|
|
|
|
|
|
|
|
|
216 |
# If there's a true box, parse it and make random box prediction
|
217 |
if has_smoke:
|
218 |
# Parse all true boxes from the annotation
|