Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
15 views8 pages

Document 2

The document outlines the implementation of an object detection system using a Convolutional Neural Network (CNN) with a focus on the Faster R-CNN model. It includes code for data preparation, model training, evaluation, and visualization of predictions on the VOC dataset. Key components include data augmentation, model evaluation metrics, and saving/loading model states.

Uploaded by

gamernirmal67
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views8 pages

Document 2

The document outlines the implementation of an object detection system using a Convolutional Neural Network (CNN) with a focus on the Faster R-CNN model. It includes code for data preparation, model training, evaluation, and visualization of predictions on the VOC dataset. Key components include data augmentation, model evaluation metrics, and saving/loading model states.

Uploaded by

gamernirmal67
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

JAL1611 NN & DL LABORATORY

Ex. No: 10

IMPLEMENTATION OF AN OBJECT DETECTION USING CNN

PROGRAM:
import os
import torch
from torch import optim
from torch.utils.data import DataLoader, Subset, random_split
from torchvision.datasets import VOCDetection
from torchvision import transforms, models
from torchvision.ops import box_iou
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import random
from PIL import Image
import multiprocessing
from torchmetrics.detection.mean_ap import MeanAveragePrecision

BATCH_SIZE = 2
NUM_EPOCHS = 5
NUM_SAMPLES = 200
VAL_RATIO = 0.2
TARGET_SIZE = (224, 224)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "fasterrcnn_model.pth"

VOC_CLASSES = [
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
label2idx = {cls: idx + 1 for idx, cls in enumerate(VOC_CLASSES)}

class VOCDataset(VOCDetection):
def __init__(self, root, year, image_set, transforms=None, augment=False):
super().__init__(root=root, year=year, image_set=image_set, download=True)
self.transforms = transforms
self.augment = augment

def __getitem__(self, index):


img, target = super().__getitem__(index)
ann = target['annotation']
objs = ann['object']
if isinstance(objs, dict): objs = [objs]

orig_w, orig_h = img.size


img = img.resize(TARGET_SIZE, Image.BILINEAR)

# Data augmentation: horizontal flip


if self.augment and random.random() > 0.5:
img = F.hflip(img)
flipped = True
else:
flipped = False

img_tensor = transforms.ToTensor()(img)
img_tensor = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])(img_tensor)

scale_x = TARGET_SIZE[0] / orig_w


scale_y = TARGET_SIZE[1] / orig_h

boxes, labels = [], []


for obj in objs:
bbox = obj['bndbox']
x1 = float(bbox['xmin']) * scale_x
y1 = float(bbox['ymin']) * scale_y
x2 = float(bbox['xmax']) * scale_x
y2 = float(bbox['ymax']) * scale_y
if flipped:
x1, x2 = TARGET_SIZE[0] - x2, TARGET_SIZE[0] - x1
boxes.append(torch.tensor([x1, y1, x2, y2], dtype=torch.float32))
labels.append(label2idx.get(obj['name'], 0))
return img_tensor, {
'boxes': torch.stack(boxes) if boxes else torch.zeros((0, 4), dtype=torch.float32),
'labels': torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,),
dtype=torch.int64)
}

def collate_fn(batch):
return tuple(zip(*batch))

def evaluate_with_map(model, data_loader):


model.eval()
metric = MeanAveragePrecision()
with torch.no_grad():
for images, targets in data_loader:
images = [img.to(DEVICE) for img in images]
outputs = model(images)
for i in range(len(images)):
pred = {
"boxes": outputs[i]["boxes"].cpu(),
"scores": outputs[i]["scores"].cpu(),
"labels": outputs[i]["labels"].cpu()
}
tgt = {
"boxes": targets[i]["boxes"].cpu(),
"labels": targets[i]["labels"].cpu()
}
metric.update([pred], [tgt])

results = metric.compute()
print(f"\n Evaluation Metrics:")
print(f" mAP @ IoU=0.5: {results['map_50']:.4f}")
print(f" mAP @ IoU=0.5:0.95: {results['map']:.4f}")

def get_model():
model = models.detection.fasterrcnn_resnet50_fpn(
weights=models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, len(VOC_CLASSES) + 1)
return model
def visualize_predictions(model, dataset, num_images=5):
model.eval()
indices = random.sample(range(len(dataset)), num_images)

for idx in indices:


img, target = dataset[idx]
img = img.to(DEVICE)

with torch.no_grad():
pred = model([img])[0]

img_np = img.permute(1, 2, 0).cpu().numpy()


img_np = np.clip((img_np * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406], 0, 1)

fig, ax = plt.subplots(1, figsize=(8, 8))


ax.imshow(img_np)

for box, score, lbl in zip(pred['boxes'], pred['scores'], pred['labels']):


if score < 0.3:
continue
x1, y1, x2, y2 = box
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
cls_name = VOC_CLASSES[lbl - 1] if lbl > 0 else 'bg'
ax.text(x1, y1, f"{cls_name}:{score:.2f}", fontsize=8,
bbox=dict(facecolor='red', pad=0.3, alpha=0.5))

for gt_box in target['boxes']:


x1, y1, x2, y2 = gt_box
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
linewidth=2, edgecolor='g', facecolor='none')
ax.add_patch(rect)

ax.axis('off')
ax.set_title(" Predicted | Ground Truth")
plt.tight_layout()
plt.show()

if __name__ == '__main__':
multiprocessing.freeze_support()
full_dataset = VOCDataset(root='./data', year='2007', image_set='train', augment=True)
random.seed(42)
indices = random.sample(range(len(full_dataset)), NUM_SAMPLES)
subset = Subset(full_dataset, indices)

n_val = int(VAL_RATIO * NUM_SAMPLES)


train_subset, val_subset = random_split(subset, [NUM_SAMPLES - n_val, n_val])

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True,


num_workers=0, collate_fn=collate_fn)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=0, collate_fn=collate_fn)

model = get_model().to(DEVICE)

if os.path.exists(MODEL_PATH):
print(f" Loading saved model from {MODEL_PATH}...")
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()
else:
print(" Training new model...")
optimizer = optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(NUM_EPOCHS):
loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}")
for images, targets in loop:
images = [img.to(DEVICE) for img in images]
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

loss_dict = model(images, targets)


losses = sum(loss for loss in loss_dict.values())

optimizer.zero_grad()
losses.backward()
optimizer.step()

loop.set_postfix(loss=losses.item())

evaluate_with_map(model, val_loader)

torch.save(model.state_dict(), MODEL_PATH)
print(f" Model saved to {MODEL_PATH}")

visualize_predictions(model, full_dataset, num_images=10)


OUTPUT:

Training new model...

Epoch 1/5: 100%|██████████████████| … loss=1.234

Validation Mean IoU: 0.123

Epoch 2/5: 100%|██████████████████| … loss=0.987

Validation Mean IoU: 0.156

Epoch 5/5: 100%|██████████████████| … loss=0.543

Validation Mean IoU: 0.156

Epoch 5/5: 100%|██████████████████| … loss=0.543

Validation Mean IoU: 0.156

Epoch 5/5: 100%|██████████████████| … loss=0.543

Validation Mean IoU: 0.201

Model saved to fasterrcnn_model.pth

You might also like