ECMM426-Template/Question 7 - ResNet.ipynb at 71d1b26a6c7ac12733c8caf80709c1afd21f097a - ECMM426-Template

32 KiB

Raw Blame History

None <html> <head> </head>

Question 7 - ResNet¶

In [1]:

import os
import glob
import torch
import numpy as np
from PIL import Image

from tqdm import tqdm
from collections import Counter
from xml.etree import ElementTree as ET

from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader

Load the dataset¶

In [2]:

val_labels = "./data/MaskedFace/val/labels"
val_imgs = "./data/MaskedFace/val/images"

y_true = glob.glob(os.path.join(val_labels,"*.txt"))
images = glob.glob(os.path.join(val_imgs,"*.png"))

In [3]:

test_dataset = {
    'images': images,  # list of image paths
    'y_true': y_true,  # list of label paths
}

In [4]:

def count_obj(txt_file, n_class):
    with open(txt_file, 'r') as file:
        lines = file.readlines()
    # Extracting the class identifiers from each line
    class_ids = [int(line.split()[0]) for line in lines]

    # Counting the occurrences of each class
    class_counts = Counter(class_ids)

    # Sorting the dictionary by class id and converting it to a list of counts
    sorted_counts = [class_counts[i] if i in class_counts else 0 for i in range(n_class)]
    return sorted_counts

In [5]:

gt_counts = []
for idx , (img , txt) in enumerate(tqdm(zip(test_dataset['images'], test_dataset['y_true']))):
    # get ground truth
    obj_count = count_obj(txt, 3)
    gt_counts.append(obj_count)

85it [00:00, 96.70it/s]

Load the model¶

In [6]:

class ImageDataset(Dataset):
    def __init__(self, directory, transformations=None):
        self.directory = directory
        self.transformations = transformations
        self.filenames = [file for file in os.listdir(directory) if file.endswith('.png')]
        self.labels_array = np.zeros((len(self.filenames), 3), dtype=np.int64)

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, index):
        file_path = os.path.join(self.directory, self.filenames[index])
        img = Image.open(file_path).convert('RGB')
        labels = self.extract_labels(file_path.replace('.png', '.xml'))
        
        if self.transformations:
            img = self.transformations(img)
        
        self.labels_array[index] = labels
        return img, torch.tensor(labels, dtype=torch.float32)

    def extract_labels(self, xml_path):
        xml_data = ET.parse(xml_path)
        categories = {'with_mask': 0, 'without_mask': 0, 'mask_weared_incorrect': 0}
        for item in xml_data.getroot().findall('object'):
            categories[item.find('name').text] += 1
        return list(categories.values())

# Define image transformations
image_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Initialize the pretrained ResNet18 model and modify the fully connected layer
pretrained_model = models.resnet18(pretrained=True)
pretrained_model.fc = torch.nn.Linear(pretrained_model.fc.in_features, 3)

# Create the dataset and dataloaders
training_data = ImageDataset('data/MaskedFace/train', transformations=image_transforms['train'])
validation_data = ImageDataset('data/MaskedFace/val', transformations=image_transforms['val'])

train_data_loader = DataLoader(training_data, batch_size=32, shuffle=True)
validation_data_loader = DataLoader(validation_data, batch_size=32)

# Setup device, loss function, optimizer, and learning rate scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pretrained_model.to(device)

D:\Anaconda3\envs\what\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
D:\Anaconda3\envs\what\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)

Out[6]:

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer3): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=512, out_features=3, bias=True)
)

In [7]:

import copy
from sklearn.metrics import mean_absolute_error

# Setup device, loss function, optimizer, and learning rate scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pretrained_model.to(device)

loss_function = torch.nn.MSELoss()
optimizer = torch.optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)
learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Function to evaluate the model's performance on validation data
def evaluate_performance(model, loader):
    model.eval()
    total_error = 0.0
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        with torch.no_grad():
            predictions = model(imgs)
        error = mean_absolute_error(lbls.cpu().detach().numpy(), predictions.cpu().detach().numpy(), multioutput='raw_values')
        total_error += np.sum(error)
    return total_error / len(loader.dataset)

# Early stopping and model saving setup
best_model_wts = copy.deepcopy(pretrained_model.state_dict())
best_loss = float('inf')
early_stopping_patience = 3
patience_counter = 0

# Training loop
epochs = 10
for epoch in range(epochs):
    pretrained_model.train()
    epoch_loss = 0.0
    for imgs, lbls in tqdm(train_data_loader):
        imgs, lbls = imgs.to(device), lbls.to(device)
        optimizer.zero_grad()
        predictions = pretrained_model(imgs)
        loss = loss_function(predictions, lbls)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    learning_rate_scheduler.step()
    
    # Validation phase
    validation_loss = evaluate_performance(pretrained_model, validation_data_loader)
    print(f'Epoch {epoch+1}, Loss: {epoch_loss / len(train_data_loader):.3f}, Validation Loss: {validation_loss:.3f}')
    
    # Check for early stopping
    if validation_loss < best_loss:
        best_loss = validation_loss
        best_model_wts = copy.deepcopy(pretrained_model.state_dict())
        torch.save(pretrained_model.state_dict(), 'best_model.pth')
        

# Load the best model weights
pretrained_model.load_state_dict(torch.load('best_model.pth'))

# Final evaluation on the validation dataset
validation_error = evaluate_performance(pretrained_model, validation_data_loader)
print(f'Validation MAE: {validation_error * 100:.2f}%')

# Print label counts from the training dataset
print(training_data.labels_array)

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:28<00:00,  6.76s/it]

Epoch 1, Loss: 13.687, Validation Loss: 0.191

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:12<00:00,  6.02s/it]

Epoch 2, Loss: 10.426, Validation Loss: 0.219

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:18<00:00,  6.28s/it]

Epoch 3, Loss: 11.348, Validation Loss: 0.227

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:17<00:00,  6.26s/it]

Epoch 4, Loss: 9.872, Validation Loss: 0.163

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:08<00:00,  5.85s/it]

Epoch 5, Loss: 8.712, Validation Loss: 0.190

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:08<00:00,  5.84s/it]

Epoch 6, Loss: 10.092, Validation Loss: 0.150

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:10<00:00,  5.94s/it]

Epoch 7, Loss: 9.503, Validation Loss: 0.321

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:09<00:00,  5.88s/it]

Epoch 8, Loss: 6.198, Validation Loss: 0.123

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:09<00:00,  5.87s/it]

Epoch 9, Loss: 5.333, Validation Loss: 0.128

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:36<00:00,  7.11s/it]

Epoch 10, Loss: 4.443, Validation Loss: 0.125
Validation MAE: 12.31%
[[1 2 0]
 [8 1 0]
 [3 0 1]
 ...
 [3 0 0]
 [1 0 0]
 [1 1 0]]

Evaluate on the test set¶

In [8]:

# Function to evaluate the model's performance on validation data
def evaluate_performance(model, loader):
    model.eval()
    total_error = 0.0
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        with torch.no_grad():
            predictions = model(imgs)
        error = mean_absolute_error(lbls.cpu().detach().numpy(), predictions.cpu().detach().numpy(), multioutput='raw_values')
        print(error)
        total_error += np.sum(error)
    return total_error / 3

In [9]:

# Load the best model weights
pretrained_model.load_state_dict(torch.load('best_model.pth'))

# Final evaluation on the validation dataset
validation_error = evaluate_performance(pretrained_model, validation_data_loader)
print(f'Validation MAE: {validation_error * 100:.2f}%')

[1.5965363  1.3042079  0.25560504]
[1.8177493  1.5732876  0.45420742]
[1.9562395  1.3338923  0.17067692]
Validation MAE: 348.75%

In [11]:

counts = []
for idx , (img , lbls) in enumerate(tqdm(validation_data)):
    img, lbls = img.to(device), lbls.to(device)
    with torch.no_grad():
        predictions = pretrained_model(torch.unsqueeze(img, 0))[0]
    counts.append(predictions.detach().numpy())

100%|██████████████████████████████████████████████████████████████████████████████████| 85/85 [00:11<00:00,  7.45it/s]

In [17]:

counts

Out[17]:

[array([14.919903  ,  1.9947946 ,  0.65775687], dtype=float32),
 array([ 6.8552303 , -0.18041131,  0.07070862], dtype=float32),
 array([ 1.0139127 ,  0.2854728 , -0.08013925], dtype=float32),
 array([4.438932  , 0.7808308 , 0.05505312], dtype=float32),
 array([7.2354264 , 3.4551375 , 0.30724907], dtype=float32),
 array([5.588563  , 0.6697209 , 0.17777884], dtype=float32),
 array([ 1.150365  ,  0.6162016 , -0.10613517], dtype=float32),
 array([ 8.920831  ,  0.6018489 , -0.06503136], dtype=float32),
 array([1.95457   , 0.17450362, 0.05267046], dtype=float32),
 array([2.5774434 , 0.9776695 , 0.18825674], dtype=float32),
 array([ 2.6093178 ,  0.48708877, -0.17551954], dtype=float32),
 array([ 0.16392124,  0.5478727 , -0.19237904], dtype=float32),
 array([ 4.6288757 ,  0.1531632 , -0.03170557], dtype=float32),
 array([ 1.8586371 ,  0.6651823 , -0.02203152], dtype=float32),
 array([3.2771707 , 3.1532748 , 0.14557752], dtype=float32),
 array([3.8890243 , 2.2504125 , 0.05863352], dtype=float32),
 array([10.092557  ,  0.9448385 ,  0.26969808], dtype=float32),
 array([ 0.27157634,  0.17475206, -0.23231247], dtype=float32),
 array([ 2.3398385 ,  0.6199454 , -0.06315048], dtype=float32),
 array([7.9481864 , 0.86970013, 0.3186779 ], dtype=float32),
 array([ 5.4592905e+00,  3.0020913e-01, -5.3105988e-03], dtype=float32),
 array([ 0.97144395,  0.82078457, -0.06586552], dtype=float32),
 array([ 1.3530452 ,  0.5240793 , -0.06924771], dtype=float32),
 array([1.1931357 , 0.5295599 , 0.20559092], dtype=float32),
 array([5.624632  , 0.8383505 , 0.37541458], dtype=float32),
 array([ 0.78418005,  0.9187632 , -0.0636546 ], dtype=float32),
 array([10.465925  ,  1.8872681 ,  0.38873053], dtype=float32),
 array([ 4.920414  ,  1.9515185 , -0.12888059], dtype=float32),
 array([1.518215  , 1.5924859 , 0.14355288], dtype=float32),
 array([6.9586325, 1.1489052, 0.2852966], dtype=float32),
 array([0.5843046 , 1.45111   , 0.00412361], dtype=float32),
 array([12.129912  ,  1.7866051 ,  0.31929207], dtype=float32),
 array([38.12094  ,  6.549285 ,  1.1005894], dtype=float32),
 array([ 1.2271879 ,  0.2557486 , -0.22623575], dtype=float32),
 array([-0.06689173,  0.0394736 ,  0.631119  ], dtype=float32),
 array([17.32966   ,  2.792189  ,  0.54758376], dtype=float32),
 array([3.3420715 , 0.09269053, 0.02531072], dtype=float32),
 array([1.5794499 , 0.42056152, 0.06615666], dtype=float32),
 array([20.351597 ,  3.7114801,  0.7863975], dtype=float32),
 array([8.772988  , 0.9012797 , 0.20384854], dtype=float32),
 array([0.8031712 , 0.46975204, 0.10056265], dtype=float32),
 array([1.3446803 , 0.8946388 , 0.12165649], dtype=float32),
 array([ 0.32257232, -0.06660413, -0.22496015], dtype=float32),
 array([3.845796  , 0.8221053 , 0.03321841], dtype=float32),
 array([ 0.7769756 ,  0.30658063, -0.3144942 ], dtype=float32),
 array([0.9002108 , 0.38418356, 0.25538492], dtype=float32),
 array([11.137635  ,  1.4070593 ,  0.46713832], dtype=float32),
 array([1.0896404 , 0.3867779 , 0.03269624], dtype=float32),
 array([-0.29543436,  0.58017415, -0.08616602], dtype=float32),
 array([4.886879  , 1.328992  , 0.08463573], dtype=float32),
 array([20.802843 ,  2.5175433,  0.1205664], dtype=float32),
 array([4.472849  , 1.8497019 , 0.07973102], dtype=float32),
 array([3.800993  , 1.2847486 , 0.40869945], dtype=float32),
 array([ 3.2214005,  2.3649635, -0.05755  ], dtype=float32),
 array([6.194131  , 1.039898  , 0.19118609], dtype=float32),
 array([5.946366 , 1.9515687, 0.0739623], dtype=float32),
 array([ 1.548485  , -0.26474452,  0.13542093], dtype=float32),
 array([-0.12953067,  2.0475016 ,  0.12173931], dtype=float32),
 array([ 3.2755911 ,  2.0698051 , -0.03214201], dtype=float32),
 array([ 4.795667 , -0.3839026, -0.324237 ], dtype=float32),
 array([1.4601235 , 0.9413236 , 0.15387204], dtype=float32),
 array([0.60179263, 0.18167558, 0.06993645], dtype=float32),
 array([2.5860176 , 0.96621907, 0.1660994 ], dtype=float32),
 array([2.3293552 , 2.248715  , 0.05637825], dtype=float32),
 array([1.5858288 , 0.75048965, 0.5053718 ], dtype=float32),
 array([4.6874514 , 2.613487  , 0.02177998], dtype=float32),
 array([ 3.015262  ,  1.2428983 , -0.06558037], dtype=float32),
 array([ 5.4304247 ,  1.3663604 , -0.18734889], dtype=float32),
 array([1.169702  , 0.29014575, 0.07055575], dtype=float32),
 array([ 2.785139  ,  1.7807665 , -0.14221995], dtype=float32),
 array([ 6.0665565e+00, -1.1839047e-03, -2.0407777e-01], dtype=float32),
 array([ 4.0390615 ,  1.0952463 , -0.17736901], dtype=float32),
 array([ 2.0545983 , -1.0606133 , -0.20474596], dtype=float32),
 array([14.975636  ,  2.6628957 ,  0.41037458], dtype=float32),
 array([ 1.532108  ,  1.0259324 , -0.02336033], dtype=float32),
 array([ 1.6325457 ,  2.1987557 , -0.23485237], dtype=float32),
 array([ 0.9079408 ,  0.1572775 , -0.20104134], dtype=float32),
 array([ 1.0071435 ,  1.1668189 , -0.06868404], dtype=float32),
 array([ 1.153094  ,  0.40935773, -0.05768288], dtype=float32),
 array([0.5880935 , 0.42007735, 0.12577775], dtype=float32),
 array([8.898152  , 0.9833183 , 0.27929026], dtype=float32),
 array([ 0.46698472,  0.8412469 , -0.2756693 ], dtype=float32),
 array([ 2.401714  ,  1.1422199 , -0.04599947], dtype=float32),
 array([6.7554636 , 0.9809863 , 0.21429788], dtype=float32),
 array([ 2.7404675 ,  0.83549696, -0.06813517], dtype=float32)]

MAPE¶

In [12]:

def compute_mape(prediction, truth):
    mape = np.mean( np.abs(truth - prediction) / np.maximum(truth, np.ones_like(truth)) ) * 100
    return mape

In [13]:

MAPE = compute_mape(np.array(counts), gt_counts)

In [14]:

print(MAPE)

68.38530732497205

Final Score¶

In [15]:

if MAPE <= 10:
    print("Score: ", 25*1.0)
elif MAPE <= 15:
    print("Score: ", 25*0.875)
elif MAPE <= 20:
    print("Score: ", 25*0.75)
elif MAPE <= 25:
    print("Score: ", 25*0.625)
elif MAPE <= 30:
    print("Score: ", 25*0.5)
else:
    print("Score: ", 0)

Score:  0

In [ ]:

</html>

32 KiB Raw Blame History

Question 7 - ResNet¶

Load the dataset¶

Load the model¶

Evaluate on the test set¶

MAPE¶

Final Score¶

32 KiB

Raw Blame History