ECMM426-Template/Question 7 - FRCNN.ipynb

548 KiB

None <html> <head> </head>

Question 7 - FasterRCNN

In [1]:
import os
import glob

import cv2
import torch
import torchvision
import numpy as np
from PIL import Image

import pandas as pd
from pathlib import Path

from tqdm import tqdm
from collections import Counter
from xml.etree import ElementTree as ET

from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
D:\Anaconda3\envs\what\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [2]:
MODEL_NAME = 'data/weights_masks.pth'

class_dict = {"with_mask": 1, 
              "without_mask": 2, 
              "mask_weared_incorrect": 3}

class_dct_inv  = {1: 'with_mask', 
                  2: "without_mask", 
                  3: "mask_weared_incorrect"}
In [3]:
import albumentations as A #note installing this changes your version of OpenCV and you loose sift
from albumentations.pytorch import ToTensorV2

# bbox_params = A.BboxParams(
#         format = 'pascal_voc',
#         label_fields = ['labels'],
#         min_visibility = 0.3
# )

# test_transform = A.Compose([
#         A.Resize(height=480, width=480),
# #         A.Normalize(
# #             mean=[0.485, 0.456, 0.406],
# #             std=[0.229, 0.224, 0.225]),
#         ToTensorV2(p=1.0)],
#         bbox_params = bbox_params
# )

bbox_params = A.BboxParams(
    format='pascal_voc',
    min_area=5,  # 5
    min_visibility=0.9,  # 0.9
    #     check_each_transform =True,
    label_fields=['labels']
)

test_transform = A.Compose([
    A.Resize(height=446, width=446),
    ToTensorV2()],
    bbox_params=bbox_params,
    is_check_shapes=False
)

Load the model

In [4]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
In [5]:
def get_model_detection(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
In [6]:
device='cuda' if torch.cuda.is_available() else 'cpu'
model = get_model_detection(4)

model.load_state_dict(torch.load(MODEL_NAME, map_location=torch.device('cpu')), False)
model.eval()
model.to(device)
Out[6]:
FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
        )
      )
      (layer2): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(512)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
      )
      (layer3): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(1024)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (4): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (5): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
      )
      (layer4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(2048)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (fpn): FeaturePyramidNetwork(
      (inner_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
        (2): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
        (3): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
      )
      (layer_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (extra_blocks): LastLevelMaxPool()
    )
  )
  (rpn): RegionProposalNetwork(
    (anchor_generator): AnchorGenerator()
    (head): RPNHead(
      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (roi_heads): RoIHeads(
    (box_roi_pool): MultiScaleRoIAlign()
    (box_head): TwoMLPHead(
      (fc6): Linear(in_features=12544, out_features=1024, bias=True)
      (fc7): Linear(in_features=1024, out_features=1024, bias=True)
    )
    (box_predictor): FastRCNNPredictor(
      (cls_score): Linear(in_features=1024, out_features=4, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=16, bias=True)
    )
  )
)

Load the dataset

In [7]:
from bs4 import BeautifulSoup

class MaskedFaceTestDataset(torch.utils.data.Dataset):
    def __init__(self, path_to_imgs, path_to_xml_files, transform=None):
        self.path_to_imgs = path_to_imgs
        self.path_to_xml_files = path_to_xml_files
        self.transform = transform

        self.file_names = [file for file in os.listdir(self.path_to_imgs) if file.endswith(".png")]

        self.file_paths = [os.path.join(self.path_to_imgs, file_name) for file_name in self.file_names]

    def __getitem__(self, index: int):
        path_to_img = self.file_paths[index]
        path = Path(path_to_img)
        fname = path.stem

        self.name = self.file_names[index]

        image = Image.open(path_to_img).convert('RGB')
        image = np.array(image) / 255.0

        target = self.__generate_target(self.path_to_xml_files, self.name)

        if self.transform:
            augmentations = self.transform(image=image, bboxes=target['boxes'], labels=target['labels'])

            image = augmentations['image']

            target['boxes'] = torch.tensor(augmentations['bboxes'])

            target['labels'] = torch.tensor(augmentations['labels'])

        return image, target, fname

    def __generate_target(self, path_to_xml, name_of_img):
        path_to_annot = os.path.join(path_to_xml, name_of_img.replace('png', 'xml'))

        boxes_lst = []
        labels_lst = []
        clases_lst = []
        target = {}

        with open(path_to_annot, 'r') as file:
            xml_content = file.read()

        soup = BeautifulSoup(xml_content, 'xml')

        objects = soup.find_all('object')

        for obj in objects:
            name = obj.find('name').text
            xmin = int(float(obj.find('xmin').text)) - 1
            ymin = int(float(obj.find('ymin').text)) - 1
            xmax = int(float(obj.find('xmax').text)) - 1
            ymax = int(float(obj.find('ymax').text)) - 1

            labels_lst.append(name)
            boxes_lst.append([xmin, ymin, xmax, ymax])
            clases_lst.append(class_dict[name])

        target["boxes"] = torch.tensor(boxes_lst, dtype=torch.float32)
        target["labels"] = torch.tensor(clases_lst, dtype=torch.int64)

        return target

    def __len__(self):
        return len(self.file_paths)
In [8]:
import xmltodict
from torch.utils.data.sampler import SubsetRandomSampler

def collate_fn(batch):
    return tuple(zip(*batch))

test_ds = MaskedFaceTestDataset(path_to_imgs = './data/MaskedFace/val/',
                               path_to_xml_files = './data/MaskedFace/val/',
                               transform = test_transform)

test_sampler = SubsetRandomSampler( list(range(0, len(test_ds))) )

test_loader = DataLoader(test_ds, batch_size=16, num_workers=0, pin_memory=True,
                             sampler=test_sampler, collate_fn=collate_fn)

Test an image

In [9]:
index = 10
In [10]:
import matplotlib.pyplot as plt

image, target, fname = test_ds[index]

cv_image = np.transpose(image, (1, 2, 0)).numpy()

for box, label in zip(target['boxes'], target['labels']):
    cv2.rectangle(cv_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0))
    cv2.putText(cv_image, 
                class_dct_inv[label.item()], 
                (int(box[0])-10, int(box[1])-10),
                cv2.FONT_HERSHEY_DUPLEX,
                0.5,
                (255, 255, 0), 
               )

plt.imshow(cv_image)
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [11]:
model.eval()

image, target, fname = test_ds[index]

with torch.no_grad():
    prediction = model(image.unsqueeze(0).to(device, dtype=torch.float))[0]

cv_image = np.transpose(image, (1, 2, 0)).numpy()

for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores']):
    if score >= 0.5:
        cv2.rectangle(cv_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0))
        cv2.putText(cv_image, 
                    class_dct_inv[label.item()], 
                    (int(box[0])-10, int(box[1])-10),
                    cv2.FONT_HERSHEY_DUPLEX,
                    0.5,
                    (255, 255, 0), 
                   )

plt.imshow(cv_image)
plt.show()
C:\Users\hw630\AppData\Roaming\Python\Python38\site-packages\torchvision\ops\boxes.py:101: UserWarning: This overload of nonzero is deprecated:
	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Evaluation on the Test set

In [12]:
def read_true_object(root):
    for child in root:
        if child.tag == "name":
            label_desc = str(child.text)
            if label_desc == "with_mask":
                return class_dict["with_mask"]
            elif label_desc == "without_mask":
                return class_dict["without_mask"]
            else:
                return class_dict["mask_weared_incorrect"]

def get_true_result(filename):
    wi_m = 0
    wo_m = 0
    inc_m = 0
    tree = ET.parse('./data/MaskedFace/val/' + filename + ".xml")
    root = tree.getroot()
    result = None
    for child in root:
        if child.tag == "object":
            result = read_true_object(child)
            if result == class_dict["with_mask"]:
                wi_m+=1
            elif result == class_dict["without_mask"]:
                wo_m+=1
            elif result == class_dict["mask_weared_incorrect"]:
                inc_m+=1
    return [wi_m, wo_m, inc_m]
In [13]:
pred_list = []
true_list = []

model.eval()

for images, targets, fname in tqdm(test_loader):
    for i in range(len(fname)):
        true_list.append(get_true_result(fname[i]))

    images = list(image.to(device, dtype=torch.float) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    with torch.no_grad():
        prediction = model(images)

    for i in range(len(images)):
        with_m = 0
        with_o_m = 0
        wrong_m = 0

        for pred in range(len(prediction[i]['boxes'])):

            box = prediction[i]['boxes'][pred].cpu().numpy()
            label = prediction[i]['labels'][pred].cpu().numpy()
            score = prediction[i]['scores'][pred].cpu().numpy()

            if score >= 0.5:
                if label.item() == class_dict["with_mask"]:
                    with_m += 1
                elif label.item() == class_dict["without_mask"]:
                    with_o_m += 1
                elif label.item() == class_dict["mask_weared_incorrect"]:
                    wrong_m += 1

        pred_list.append([int(with_m), int(with_o_m), int(wrong_m)])
100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [05:42<00:00, 57.09s/it]

MAPE

In [14]:
def compute_mape(prediction, truth):
    mape = np.mean( np.abs(truth - prediction) / np.maximum(truth, np.ones_like(truth)) ) * 100
    return mape
In [15]:
MAPE = compute_mape( np.array(pred_list), np.array(true_list) )
In [16]:
print(MAPE)
25.12234633384938

Final Score

In [17]:
if MAPE <= 10:
    print("Score: ", 25*1.0)
elif MAPE <= 15:
    print("Score: ", 25*0.875)
elif MAPE <= 20:
    print("Score: ", 25*0.75)
elif MAPE <= 25:
    print("Score: ", 25*0.625)
elif MAPE <= 30:
    print("Score: ", 25*0.5)
else:
    print("Score: ", 0)
Score:  12.5
In [ ]:

</html>