Initial Commit

This commit is contained in:
wuhanstudio 2024-03-11 13:00:35 +00:00
commit 8027439bc1
18 changed files with 1512 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.ipynb_checkpoints
data/EXCV10
data/MaskedFace
__pycache__

BIN
ECMM426.pdf Normal file

Binary file not shown.

676
Question 1.ipynb Normal file

File diff suppressed because one or more lines are too long

419
Question 2.ipynb Normal file
View File

@ -0,0 +1,419 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "f96d2a83",
"metadata": {},
"source": [
"## Question 2 (20 marks)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "6f53891a",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d3bbb31a",
"metadata": {},
"outputs": [],
"source": [
"n_clusters = 100"
]
},
{
"cell_type": "markdown",
"id": "28068e50",
"metadata": {},
"source": [
"Read images"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "87dd5c72",
"metadata": {},
"outputs": [],
"source": [
"im_book = cv2.imread('data/books.jpg', cv2.IMREAD_GRAYSCALE)\n",
"im_mount = cv2.imread('data/mount_rushmore_1.jpg', cv2.IMREAD_GRAYSCALE)\n",
"im_notre = cv2.imread('data/notre_dame_1.jpg', cv2.IMREAD_GRAYSCALE)"
]
},
{
"cell_type": "markdown",
"id": "79c57454",
"metadata": {},
"source": [
"## Generate Clusters"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2e571277",
"metadata": {},
"outputs": [],
"source": [
"def get_features(image):\n",
" image = image[:, :, np.newaxis]\n",
"\n",
" # Initialize a SIFT detector\n",
" sift = cv2.SIFT_create()\n",
"\n",
" # Detect keypoints and compute descriptors\n",
" keypoints, descriptors = sift.detectAndCompute(image, None)\n",
"\n",
" return keypoints, descriptors"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "12d042c8",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"def get_clusters(keypoints, descriptors, n_clusters=100):\n",
"\n",
" # Perform k-means clustering\n",
" kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)\n",
" kmeans.fit(descriptors)\n",
"\n",
" # Assign descriptors to clusters\n",
" clusters = kmeans.predict(descriptors)\n",
"\n",
" # Convert keypoints to locations (x, y coordinates)\n",
" locations = np.array([kp.pt for kp in keypoints], dtype=np.int64)\n",
" \n",
" return clusters, locations"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b04681a0",
"metadata": {},
"outputs": [],
"source": [
"kpts_book, des_book = get_features(im_book)\n",
"kpts_mount, des_mount = get_features(im_mount)\n",
"kpts_notre, des_notre = get_features(im_notre)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "70d23c90",
"metadata": {},
"outputs": [],
"source": [
"clusters_book, locations_book = get_clusters(kpts_book, des_book, n_clusters=n_clusters)\n",
"clusters_mount, locations_mount = get_clusters(kpts_mount, des_mount, n_clusters=n_clusters)\n",
"clusters_notre, locations_notre = get_clusters(kpts_notre, des_notre, n_clusters=n_clusters)"
]
},
{
"cell_type": "markdown",
"id": "1518e19f",
"metadata": {},
"source": [
"## Method 1 (Two FOR loops)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "04f3901b",
"metadata": {},
"outputs": [],
"source": [
"def m1_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
" \"\"\"\n",
" Create bag of visual words representation of an image based on the division information.\n",
" \n",
" Parameters:\n",
" im (numpy.ndarray): Image array of data type uint8.\n",
" locations (numpy.ndarray): Array of shape (N, 2) with Cartesian coordinates (x, y).\n",
" clusters (numpy.ndarray): Array of shape (N,) with quantised cluster id.\n",
" division (list): List of integers of length 2 indicating division along Y and X axes.\n",
" \n",
" Returns:\n",
" numpy.ndarray: 1-dimensional array representing the BoVW spatial histogram.\n",
" \"\"\"\n",
"\n",
" # Determine the size of each division\n",
" div_height = im.shape[0] // division[0]\n",
" div_width = im.shape[1] // division[1]\n",
" \n",
" # Initialize the histogram\n",
" num_clusters = np.unique(clusters).size\n",
" histogram = np.zeros((division[0] * division[1] * num_clusters,), dtype=np.int64)\n",
"\n",
" # Two FOR loops\n",
" for div_y in range(division[0]):\n",
" for div_x in range(division[1]):\n",
" # Define the bounds of the current division\n",
" y_start = div_y * div_height\n",
" y_end = (div_y + 1) * div_height\n",
" x_start = div_x * div_width\n",
" x_end = (div_x + 1) * div_width\n",
"\n",
" # Find features within the current division\n",
" div_mask = (locations[:, 1] >= y_start) & (locations[:, 1] < y_end) & \\\n",
" (locations[:, 0] >= x_start) & (locations[:, 0] < x_end)\n",
" div_locations = locations[div_mask]\n",
" div_clusters = clusters[div_mask]\n",
"\n",
" # Calculate the histogram for the current division\n",
" for i in range(num_clusters):\n",
" cluster_mask = (div_clusters == i)\n",
" histogram[div_y * division[1] * num_clusters + div_x * num_clusters + i] = np.sum(cluster_mask)\n",
" \n",
" return histogram"
]
},
{
"cell_type": "markdown",
"id": "0f64e74a",
"metadata": {},
"source": [
"## Method 2 (One FOR loop)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a597d45d",
"metadata": {},
"outputs": [],
"source": [
"def m2_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
"\n",
" img_shape = np.shape(im)\n",
"\n",
" height = img_shape[0]\n",
" width = img_shape[1]\n",
"\n",
" ## Possible Mistakes: Some students swapped x and y\n",
" div_x = division[1]\n",
" div_y = division[0]\n",
"\n",
" x_size = width / div_x\n",
" y_size = height / div_y\n",
"\n",
" num_divisions = division[0] * division[1]\n",
"\n",
" num_clusters = np.max(clusters) + 1\n",
"\n",
" histogram = np.zeros(num_clusters * num_divisions)\n",
"\n",
" # One FOR loop\n",
" for i in range(len(locations)):\n",
" point = locations[i]\n",
" cluster = clusters[i]\n",
"\n",
" x_div = np.ceil((point[0] + 1) / x_size).astype(np.int64) - 1\n",
" y_div = np.ceil((point[1] + 1) / y_size).astype(np.int64) - 1\n",
"\n",
" # Possible Mistakes: Some students miscalculated the boundary condition\n",
" # x_div = np.ceil(point[0] / x_size).astype(np.int64) - 1\n",
" # y_div = np.ceil(point[1] / y_size).astype(np.int64) - 1\n",
"\n",
" # Calculate the array position\n",
" div = x_div + (y_div * div_x)\n",
" array_pos = (div * num_clusters) + cluster\n",
" \n",
" # Update the histogram\n",
" histogram[array_pos] = histogram[array_pos] + 1\n",
"\n",
" return histogram.astype(int)"
]
},
{
"cell_type": "markdown",
"id": "36f2fe5e",
"metadata": {},
"source": [
"## Put students' implementations here"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3804d695",
"metadata": {},
"outputs": [],
"source": [
"# Be careful, some students used a different function name (e.g. bowv rather than bovw)\n",
"def generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
" # Determine the number of clusters\n",
" num_clusters = np.unique(clusters).size\n",
"\n",
" # Initialize histogram\n",
" spatial_histogram = np.zeros(num_clusters * np.prod(division), dtype=np.int64)\n",
"\n",
" div_size_y = im.shape[0] // division[0]\n",
" div_size_x = im.shape[1] // division[1]\n",
"\n",
" for div_y in range(division[0]):\n",
" for div_x in range(division[1]):\n",
" start_y = div_y * div_size_y\n",
" end_y = (div_y + 1) * div_size_y if div_y < division[0] - 1 else im.shape[0]\n",
" start_x = div_x * div_size_x\n",
" end_x = (div_x + 1) * div_size_x if div_x < division[1] - 1 else im.shape[1]\n",
" for loc, cluster_id in zip(locations, clusters):\n",
" x, y = loc\n",
" if start_y <= y < end_y and start_x <= x < end_x:\n",
" index = (div_y * division[1] + div_x) * num_clusters + cluster_id\n",
" spatial_histogram[index] += 1\n",
" return spatial_histogram"
]
},
{
"cell_type": "markdown",
"id": "0d516e6a",
"metadata": {},
"source": [
"## Test (Should output ALL PASS)"
]
},
{
"cell_type": "markdown",
"id": "a269b35f",
"metadata": {},
"source": [
"Restart and Run ALL for each submission"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "95d72a5c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Testing division: [1, 1]\n",
"PASS: Book\n",
"PASS: Mount\n",
"PASS: Notre\n",
"Testing division: [2, 2]\n",
"PASS: Book\n",
"PASS: Mount\n",
"PASS: Notre\n",
"Testing division: [2, 3]\n",
"PASS: Book\n",
"PASS: Mount\n",
"PASS: Notre\n",
"ALL PASS\n"
]
}
],
"source": [
"histograms = []\n",
"for division in [ [1, 1], [2, 2], [2, 3] ]:\n",
" print('Testing division:', division)\n",
"\n",
" m1_histogram_book = m1_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
" m1_histogram_mount = m1_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
" m1_histogram_notre = m1_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
"\n",
" m2_histogram_book = m2_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
" m2_histogram_mount = m2_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
" m2_histogram_notre = m2_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
"\n",
" # Students' implementations\n",
" histogram_book = generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
" histogram_mount = generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
" histogram_notre = generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
" \n",
" assert np.allclose(m1_histogram_book, m2_histogram_book)\n",
" assert np.allclose(m1_histogram_book, histogram_book)\n",
" print(\"PASS: Book\")\n",
"\n",
" assert np.allclose(m1_histogram_mount, m2_histogram_mount)\n",
" assert np.allclose(m1_histogram_mount, histogram_mount)\n",
" print(\"PASS: Mount\")\n",
"\n",
" assert np.allclose(m1_histogram_notre, m2_histogram_notre)\n",
" assert np.allclose(m1_histogram_notre, histogram_notre)\n",
" print(\"PASS: Notre\")\n",
"\n",
" histograms.append( [m1_histogram_book, m1_histogram_mount, m1_histogram_notre] )\n",
"print(\"ALL PASS\")"
]
},
{
"cell_type": "markdown",
"id": "00a11df8",
"metadata": {},
"source": [
"## Save Output"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ea1fadb1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda3\\envs\\what\\lib\\site-packages\\numpy\\lib\\npyio.py:521: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" arr = np.asanyarray(arr)\n"
]
}
],
"source": [
"np.save('data/question_3_histogram.npy', histograms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddc3036c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "what",
"language": "python",
"name": "what"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

224
Question 3.ipynb Normal file
View File

@ -0,0 +1,224 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "924a2a50",
"metadata": {},
"source": [
"## Question 3 (10 marks)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "2b3d1ba2",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "478bad7a",
"metadata": {},
"outputs": [],
"source": [
"points = np.load('data/points.npy').astype(np.uint8)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e07b0285",
"metadata": {},
"outputs": [],
"source": [
"def compute_rotation_matrix(points, theta):\n",
" \"\"\"\n",
" Write a function compute_rotation_matrix(points, theta) to compute the rotation matrix in\n",
" homogeneous coordinate system to rotate a shape depicted with 2-dimensional (x,y) coordinates\n",
" points with an angle 𝜃 (theta in the definition) in the anticlockwise direction about the centre of the shape.\n",
"\n",
" Parameters:\n",
" points: a 2-dimensional numpy array of data type uint8 with shape 𝑘 × 2. Each row\n",
" of points is a Cartesian coordinate (x, y).\n",
" \n",
" theta: a floating-point number denoting the angle of rotation in degree.\n",
" \n",
" Returns:\n",
" The expected output is a 2-dimensional numpy array of data type float64 with shape 3 × 3.\n",
" \"\"\"\n",
"\n",
" # Convert theta from degrees to radians\n",
" theta_rad = np.radians(theta)\n",
"\n",
" # Calculate the centre of the shape\n",
" centre = np.mean(points, axis=0)\n",
"\n",
" # Define the translation matrices to move the centre of the shape to the origin and back\n",
" translation_to_origin = np.array([[1, 0, -centre[0]],\n",
" [0, 1, -centre[1]],\n",
" [0, 0, 1]], dtype=np.float64)\n",
"\n",
" translation_back = np.array([[1, 0, centre[0]],\n",
" [0, 1, centre[1]],\n",
" [0, 0, 1]], dtype=np.float64)\n",
"\n",
" # Define the rotation matrix about the origin\n",
" rotation = np.array([[np.cos(theta_rad), -np.sin(theta_rad), 0],\n",
" [np.sin(theta_rad), np.cos(theta_rad), 0],\n",
" [0, 0, 1]], dtype=np.float64)\n",
"\n",
" # Combine the translation and rotation into a single transformation matrix\n",
" rotation_matrix = np.dot(np.dot(translation_back, rotation), translation_to_origin)\n",
" \n",
" return rotation_matrix"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "16ef4247",
"metadata": {},
"outputs": [],
"source": [
"rotation_matrices = []\n",
"\n",
"for t in range(0, 365, 5):\n",
" rotation_matrices.append( compute_rotation_matrix(points, t) )"
]
},
{
"cell_type": "markdown",
"id": "a130c201",
"metadata": {},
"source": [
"## Save Output"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "23967b5d",
"metadata": {},
"outputs": [],
"source": [
"np.save('data/question_3_rotation_matrices.npy', rotation_matrices)"
]
},
{
"cell_type": "markdown",
"id": "a808d8a4",
"metadata": {},
"source": [
"## Put students' implementations here"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b0836f8d",
"metadata": {},
"outputs": [],
"source": [
"def compute_rotation_matrix(points, theta):\n",
" # Convert points to float64\n",
" points = points.astype(np.float64)\n",
" # Calculate centre\n",
" centre = np.mean(points, axis=0)\n",
" # Compute rotation matrix\n",
" rotation_matrix = np.array([[np.cos(np.radians(theta)), -np.sin(np.radians(theta)), 0],\n",
" [np.sin(np.radians(theta)), np.cos(np.radians(theta)), 0],\n",
" [0, 0, 1]])\n",
" # Translation matrix to origin\n",
" translation_to_origin = np.array([[1, 0, -centre[0]],\n",
" [0, 1, -centre[1]],\n",
" [0, 0, 1]])\n",
" # Translation matrix to original position\n",
" translation_to_centre = np.array([[1, 0, centre[0]],\n",
" [0, 1, centre[1]],\n",
" [0, 0, 1]])\n",
" # Combine transformations with data type float64 \n",
" combined_matrix = np.dot(np.dot(translation_to_centre, rotation_matrix), translation_to_origin).astype(np.float64)\n",
" return combined_matrix\n",
"\n",
" return 0"
]
},
{
"cell_type": "markdown",
"id": "73b68192",
"metadata": {},
"source": [
"## Test (Should output ALL PASS)"
]
},
{
"cell_type": "markdown",
"id": "1c0a88a6",
"metadata": {},
"source": [
"Restart and Run ALL for each submission"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "132d734b",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"73 PASS\n",
"ALL PASS\n"
]
}
],
"source": [
"n_pass = 0\n",
"for t in range(0, 365, 5):\n",
" if np.allclose(compute_rotation_matrix(points, t), rotation_matrices[int(t / 5)]):\n",
" n_pass = n_pass + 1\n",
"\n",
"print(n_pass, \"PASS\")\n",
"assert n_pass == len(rotation_matrices)\n",
"print(\"ALL PASS\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0fac308a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "what",
"language": "python",
"name": "what"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

2
README.md Normal file
View File

@ -0,0 +1,2 @@
## ECMM426 Template

187
ca_utils.py Normal file
View File

@ -0,0 +1,187 @@
import cv2
import math
import torch
import pickle
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
def im2single(im):
im = im.astype(np.float32) / 255
return im
def single2im(im):
im *= 255
im = im.astype(np.uint8)
return im
def load_interest_points(eval_file):
"""
This function is provided for development and debugging but cannot be used in
the final handin. It 'cheats' by generating interest points from known
correspondences. It will only work for the 3 image pairs with known
correspondences.
Args:
- eval_file: string representing the file path to the list of known correspondences
- scale_factor: Python float representing the scale needed to map from the original
image coordinates to the resolution being used for the current experiment.
Returns:
- x1: A numpy array of shape (k,) containing ground truth x-coordinates of imgA correspondence pts
- y1: A numpy array of shape (k,) containing ground truth y-coordinates of imgA correspondence pts
- x2: A numpy array of shape (k,) containing ground truth x-coordinates of imgB correspondence pts
- y2: A numpy array of shape (k,) containing ground truth y-coordinates of imgB correspondence pts
"""
with open(eval_file, 'rb') as f:
d = pickle.load(f, encoding='latin1')
scale_factor = 1.0
return d['x1'] * scale_factor, d['y1'] * scale_factor, d['x2'] * scale_factor, d['y2'] * scale_factor
def show_interest_points(img, X, Y):
"""
Visualized interest points on an image with random colors
Args:
- img: A numpy array of shape (M,N,C)
- X: A numpy array of shape (k,) containing x-locations of interest points
- Y: A numpy array of shape (k,) containing y-locations of interest points
Returns:
- newImg: A numpy array of shape (M,N,C) showing the original image with
colored circles at keypoints plotted on top of it
"""
newImg = img.copy()
for x, y in zip(X.astype(int), Y.astype(int)):
cur_color = np.random.rand(3)
newImg = cv2.circle(newImg, (int(x), int(y)), 10, cur_color, -1)
return newImg
def conv3x3(in_planes, out_planes, stride=1):
"""
3x3 convolution with padding
"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
else:
residual = x
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes*4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
else:
residual = x
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, in_channels=3, channels=[16, 32, 64], num_classes=10, flatten=True):
super(ResNet, self).__init__()
self.name = "resnet"
self.flatten = flatten
self.channels = channels
self.inplanes = channels[0]
self.conv1 = nn.Conv2d(in_channels, channels[0], kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
self.bn1 = nn.BatchNorm2d(channels[0])
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, channels[0], layers[0])
self.layer2 = self._make_layer(block, channels[1], layers[1], stride=2)
self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(1) # global pooling
self.fc = nn.Linear(channels[2], num_classes) # global pooling
if flatten:
self.feature_size = channels[2]*block.expansion
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
if self.flatten:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x

BIN
data/books.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

BIN
data/mask.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 KiB

BIN
data/mount_rushmore_1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

BIN
data/notre_dame_1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 926 KiB

BIN
data/points.npy Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
data/question_3.npy Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
data/shapes.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB