JAL1611 NN & DL LABORATORY
Ex. No: 10
IMPLEMENTATION OF AN OBJECT DETECTION USING CNN
PROGRAM:
import os
import torch
from torch import optim
from torch.utils.data import DataLoader, Subset, random_split
from torchvision.datasets import VOCDetection
from torchvision import transforms, models
from torchvision.ops import box_iou
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import random
from PIL import Image
import multiprocessing
from torchmetrics.detection.mean_ap import MeanAveragePrecision
BATCH_SIZE = 2
NUM_EPOCHS = 5
NUM_SAMPLES = 200
VAL_RATIO = 0.2
TARGET_SIZE = (224, 224)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "fasterrcnn_model.pth"
VOC_CLASSES = [
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
label2idx = {cls: idx + 1 for idx, cls in enumerate(VOC_CLASSES)}
class VOCDataset(VOCDetection):
def __init__(self, root, year, image_set, transforms=None, augment=False):
super().__init__(root=root, year=year, image_set=image_set, download=True)
self.transforms = transforms
self.augment = augment
def __getitem__(self, index):
img, target = super().__getitem__(index)
ann = target['annotation']
objs = ann['object']
if isinstance(objs, dict): objs = [objs]
orig_w, orig_h = img.size
img = img.resize(TARGET_SIZE, Image.BILINEAR)
# Data augmentation: horizontal flip
if self.augment and random.random() > 0.5:
img = F.hflip(img)
flipped = True
else:
flipped = False
img_tensor = transforms.ToTensor()(img)
img_tensor = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])(img_tensor)
scale_x = TARGET_SIZE[0] / orig_w
scale_y = TARGET_SIZE[1] / orig_h
boxes, labels = [], []
for obj in objs:
bbox = obj['bndbox']
x1 = float(bbox['xmin']) * scale_x
y1 = float(bbox['ymin']) * scale_y
x2 = float(bbox['xmax']) * scale_x
y2 = float(bbox['ymax']) * scale_y
if flipped:
x1, x2 = TARGET_SIZE[0] - x2, TARGET_SIZE[0] - x1
boxes.append(torch.tensor([x1, y1, x2, y2], dtype=torch.float32))
labels.append(label2idx.get(obj['name'], 0))
return img_tensor, {
'boxes': torch.stack(boxes) if boxes else torch.zeros((0, 4), dtype=torch.float32),
'labels': torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,),
dtype=torch.int64)
}
def collate_fn(batch):
return tuple(zip(*batch))
def evaluate_with_map(model, data_loader):
model.eval()
metric = MeanAveragePrecision()
with torch.no_grad():
for images, targets in data_loader:
images = [img.to(DEVICE) for img in images]
outputs = model(images)
for i in range(len(images)):
pred = {
"boxes": outputs[i]["boxes"].cpu(),
"scores": outputs[i]["scores"].cpu(),
"labels": outputs[i]["labels"].cpu()
}
tgt = {
"boxes": targets[i]["boxes"].cpu(),
"labels": targets[i]["labels"].cpu()
}
metric.update([pred], [tgt])
results = metric.compute()
print(f"\n Evaluation Metrics:")
print(f" mAP @ IoU=0.5: {results['map_50']:.4f}")
print(f" mAP @ IoU=0.5:0.95: {results['map']:.4f}")
def get_model():
model = models.detection.fasterrcnn_resnet50_fpn(
weights=models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, len(VOC_CLASSES) + 1)
return model
def visualize_predictions(model, dataset, num_images=5):
model.eval()
indices = random.sample(range(len(dataset)), num_images)
for idx in indices:
img, target = dataset[idx]
img = img.to(DEVICE)
with torch.no_grad():
pred = model([img])[0]
img_np = img.permute(1, 2, 0).cpu().numpy()
img_np = np.clip((img_np * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406], 0, 1)
fig, ax = plt.subplots(1, figsize=(8, 8))
ax.imshow(img_np)
for box, score, lbl in zip(pred['boxes'], pred['scores'], pred['labels']):
if score < 0.3:
continue
x1, y1, x2, y2 = box
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
cls_name = VOC_CLASSES[lbl - 1] if lbl > 0 else 'bg'
ax.text(x1, y1, f"{cls_name}:{score:.2f}", fontsize=8,
bbox=dict(facecolor='red', pad=0.3, alpha=0.5))
for gt_box in target['boxes']:
x1, y1, x2, y2 = gt_box
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
linewidth=2, edgecolor='g', facecolor='none')
ax.add_patch(rect)
ax.axis('off')
ax.set_title(" Predicted | Ground Truth")
plt.tight_layout()
plt.show()
if __name__ == '__main__':
multiprocessing.freeze_support()
full_dataset = VOCDataset(root='./data', year='2007', image_set='train', augment=True)
random.seed(42)
indices = random.sample(range(len(full_dataset)), NUM_SAMPLES)
subset = Subset(full_dataset, indices)
n_val = int(VAL_RATIO * NUM_SAMPLES)
train_subset, val_subset = random_split(subset, [NUM_SAMPLES - n_val, n_val])
train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True,
num_workers=0, collate_fn=collate_fn)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=0, collate_fn=collate_fn)
model = get_model().to(DEVICE)
if os.path.exists(MODEL_PATH):
print(f" Loading saved model from {MODEL_PATH}...")
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()
else:
print(" Training new model...")
optimizer = optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(NUM_EPOCHS):
loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}")
for images, targets in loop:
images = [img.to(DEVICE) for img in images]
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
losses.backward()
optimizer.step()
loop.set_postfix(loss=losses.item())
evaluate_with_map(model, val_loader)
torch.save(model.state_dict(), MODEL_PATH)
print(f" Model saved to {MODEL_PATH}")
visualize_predictions(model, full_dataset, num_images=10)
OUTPUT:
Training new model...
Epoch 1/5: 100%|██████████████████| … loss=1.234
Validation Mean IoU: 0.123
Epoch 2/5: 100%|██████████████████| … loss=0.987
Validation Mean IoU: 0.156
Epoch 5/5: 100%|██████████████████| … loss=0.543
Validation Mean IoU: 0.156
Epoch 5/5: 100%|██████████████████| … loss=0.543
Validation Mean IoU: 0.156
Epoch 5/5: 100%|██████████████████| … loss=0.543
Validation Mean IoU: 0.201
Model saved to fasterrcnn_model.pth