ECMM426-Template/ca_utils.py

188 lines
6.4 KiB
Python

import cv2
import math
import torch
import pickle
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
def im2single(im):
im = im.astype(np.float32) / 255
return im
def single2im(im):
im *= 255
im = im.astype(np.uint8)
return im
def load_interest_points(eval_file):
"""
This function is provided for development and debugging but cannot be used in
the final handin. It 'cheats' by generating interest points from known
correspondences. It will only work for the 3 image pairs with known
correspondences.
Args:
- eval_file: string representing the file path to the list of known correspondences
- scale_factor: Python float representing the scale needed to map from the original
image coordinates to the resolution being used for the current experiment.
Returns:
- x1: A numpy array of shape (k,) containing ground truth x-coordinates of imgA correspondence pts
- y1: A numpy array of shape (k,) containing ground truth y-coordinates of imgA correspondence pts
- x2: A numpy array of shape (k,) containing ground truth x-coordinates of imgB correspondence pts
- y2: A numpy array of shape (k,) containing ground truth y-coordinates of imgB correspondence pts
"""
with open(eval_file, 'rb') as f:
d = pickle.load(f, encoding='latin1')
scale_factor = 1.0
return d['x1'] * scale_factor, d['y1'] * scale_factor, d['x2'] * scale_factor, d['y2'] * scale_factor
def show_interest_points(img, X, Y):
"""
Visualized interest points on an image with random colors
Args:
- img: A numpy array of shape (M,N,C)
- X: A numpy array of shape (k,) containing x-locations of interest points
- Y: A numpy array of shape (k,) containing y-locations of interest points
Returns:
- newImg: A numpy array of shape (M,N,C) showing the original image with
colored circles at keypoints plotted on top of it
"""
newImg = img.copy()
for x, y in zip(X.astype(int), Y.astype(int)):
cur_color = np.random.rand(3)
newImg = cv2.circle(newImg, (int(x), int(y)), 10, cur_color, -1)
return newImg
def conv3x3(in_planes, out_planes, stride=1):
"""
3x3 convolution with padding
"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
else:
residual = x
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes*4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
else:
residual = x
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, in_channels=3, channels=[16, 32, 64], num_classes=10, flatten=True):
super(ResNet, self).__init__()
self.name = "resnet"
self.flatten = flatten
self.channels = channels
self.inplanes = channels[0]
self.conv1 = nn.Conv2d(in_channels, channels[0], kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
self.bn1 = nn.BatchNorm2d(channels[0])
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, channels[0], layers[0])
self.layer2 = self._make_layer(block, channels[1], layers[1], stride=2)
self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(1) # global pooling
self.fc = nn.Linear(channels[2], num_classes) # global pooling
if flatten:
self.feature_size = channels[2]*block.expansion
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
if self.flatten:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x