420 lines
13 KiB
Plaintext
420 lines
13 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f96d2a83",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Question 2 (20 marks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "6f53891a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import cv2\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "d3bbb31a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"n_clusters = 100"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "28068e50",
|
|
"metadata": {},
|
|
"source": [
|
|
"Read images"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "87dd5c72",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"im_book = cv2.imread('data/books.jpg', cv2.IMREAD_GRAYSCALE)\n",
|
|
"im_mount = cv2.imread('data/mount_rushmore_1.jpg', cv2.IMREAD_GRAYSCALE)\n",
|
|
"im_notre = cv2.imread('data/notre_dame_1.jpg', cv2.IMREAD_GRAYSCALE)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "79c57454",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Generate Clusters"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "2e571277",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_features(image):\n",
|
|
" image = image[:, :, np.newaxis]\n",
|
|
"\n",
|
|
" # Initialize a SIFT detector\n",
|
|
" sift = cv2.SIFT_create()\n",
|
|
"\n",
|
|
" # Detect keypoints and compute descriptors\n",
|
|
" keypoints, descriptors = sift.detectAndCompute(image, None)\n",
|
|
"\n",
|
|
" return keypoints, descriptors"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "12d042c8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.cluster import KMeans\n",
|
|
"\n",
|
|
"def get_clusters(keypoints, descriptors, n_clusters=100):\n",
|
|
"\n",
|
|
" # Perform k-means clustering\n",
|
|
" kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)\n",
|
|
" kmeans.fit(descriptors)\n",
|
|
"\n",
|
|
" # Assign descriptors to clusters\n",
|
|
" clusters = kmeans.predict(descriptors)\n",
|
|
"\n",
|
|
" # Convert keypoints to locations (x, y coordinates)\n",
|
|
" locations = np.array([kp.pt for kp in keypoints], dtype=np.int64)\n",
|
|
" \n",
|
|
" return clusters, locations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "b04681a0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kpts_book, des_book = get_features(im_book)\n",
|
|
"kpts_mount, des_mount = get_features(im_mount)\n",
|
|
"kpts_notre, des_notre = get_features(im_notre)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "70d23c90",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"clusters_book, locations_book = get_clusters(kpts_book, des_book, n_clusters=n_clusters)\n",
|
|
"clusters_mount, locations_mount = get_clusters(kpts_mount, des_mount, n_clusters=n_clusters)\n",
|
|
"clusters_notre, locations_notre = get_clusters(kpts_notre, des_notre, n_clusters=n_clusters)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1518e19f",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Method 1 (Two FOR loops)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "04f3901b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def m1_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
|
|
" \"\"\"\n",
|
|
" Create bag of visual words representation of an image based on the division information.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" im (numpy.ndarray): Image array of data type uint8.\n",
|
|
" locations (numpy.ndarray): Array of shape (N, 2) with Cartesian coordinates (x, y).\n",
|
|
" clusters (numpy.ndarray): Array of shape (N,) with quantised cluster id.\n",
|
|
" division (list): List of integers of length 2 indicating division along Y and X axes.\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" numpy.ndarray: 1-dimensional array representing the BoVW spatial histogram.\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" # Determine the size of each division\n",
|
|
" div_height = im.shape[0] // division[0]\n",
|
|
" div_width = im.shape[1] // division[1]\n",
|
|
" \n",
|
|
" # Initialize the histogram\n",
|
|
" num_clusters = np.unique(clusters).size\n",
|
|
" histogram = np.zeros((division[0] * division[1] * num_clusters,), dtype=np.int64)\n",
|
|
"\n",
|
|
" # Two FOR loops\n",
|
|
" for div_y in range(division[0]):\n",
|
|
" for div_x in range(division[1]):\n",
|
|
" # Define the bounds of the current division\n",
|
|
" y_start = div_y * div_height\n",
|
|
" y_end = (div_y + 1) * div_height\n",
|
|
" x_start = div_x * div_width\n",
|
|
" x_end = (div_x + 1) * div_width\n",
|
|
"\n",
|
|
" # Find features within the current division\n",
|
|
" div_mask = (locations[:, 1] >= y_start) & (locations[:, 1] < y_end) & \\\n",
|
|
" (locations[:, 0] >= x_start) & (locations[:, 0] < x_end)\n",
|
|
" div_locations = locations[div_mask]\n",
|
|
" div_clusters = clusters[div_mask]\n",
|
|
"\n",
|
|
" # Calculate the histogram for the current division\n",
|
|
" for i in range(num_clusters):\n",
|
|
" cluster_mask = (div_clusters == i)\n",
|
|
" histogram[div_y * division[1] * num_clusters + div_x * num_clusters + i] = np.sum(cluster_mask)\n",
|
|
" \n",
|
|
" return histogram"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0f64e74a",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Method 2 (One FOR loop)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "a597d45d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def m2_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
|
|
"\n",
|
|
" img_shape = np.shape(im)\n",
|
|
"\n",
|
|
" height = img_shape[0]\n",
|
|
" width = img_shape[1]\n",
|
|
"\n",
|
|
" ## Possible Mistakes: Some students swapped x and y\n",
|
|
" div_x = division[1]\n",
|
|
" div_y = division[0]\n",
|
|
"\n",
|
|
" x_size = width / div_x\n",
|
|
" y_size = height / div_y\n",
|
|
"\n",
|
|
" num_divisions = division[0] * division[1]\n",
|
|
"\n",
|
|
" num_clusters = np.max(clusters) + 1\n",
|
|
"\n",
|
|
" histogram = np.zeros(num_clusters * num_divisions)\n",
|
|
"\n",
|
|
" # One FOR loop\n",
|
|
" for i in range(len(locations)):\n",
|
|
" point = locations[i]\n",
|
|
" cluster = clusters[i]\n",
|
|
"\n",
|
|
" x_div = np.ceil((point[0] + 1) / x_size).astype(np.int64) - 1\n",
|
|
" y_div = np.ceil((point[1] + 1) / y_size).astype(np.int64) - 1\n",
|
|
"\n",
|
|
" # Possible Mistakes: Some students miscalculated the boundary condition\n",
|
|
" # x_div = np.ceil(point[0] / x_size).astype(np.int64) - 1\n",
|
|
" # y_div = np.ceil(point[1] / y_size).astype(np.int64) - 1\n",
|
|
"\n",
|
|
" # Calculate the array position\n",
|
|
" div = x_div + (y_div * div_x)\n",
|
|
" array_pos = (div * num_clusters) + cluster\n",
|
|
" \n",
|
|
" # Update the histogram\n",
|
|
" histogram[array_pos] = histogram[array_pos] + 1\n",
|
|
"\n",
|
|
" return histogram.astype(int)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "36f2fe5e",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Put students' implementations here"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "3804d695",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Be careful, some students used a different function name (e.g. bowv rather than bovw)\n",
|
|
"def generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
|
|
" # Determine the number of clusters\n",
|
|
" num_clusters = np.unique(clusters).size\n",
|
|
"\n",
|
|
" # Initialize histogram\n",
|
|
" spatial_histogram = np.zeros(num_clusters * np.prod(division), dtype=np.int64)\n",
|
|
"\n",
|
|
" div_size_y = im.shape[0] // division[0]\n",
|
|
" div_size_x = im.shape[1] // division[1]\n",
|
|
"\n",
|
|
" for div_y in range(division[0]):\n",
|
|
" for div_x in range(division[1]):\n",
|
|
" start_y = div_y * div_size_y\n",
|
|
" end_y = (div_y + 1) * div_size_y if div_y < division[0] - 1 else im.shape[0]\n",
|
|
" start_x = div_x * div_size_x\n",
|
|
" end_x = (div_x + 1) * div_size_x if div_x < division[1] - 1 else im.shape[1]\n",
|
|
" for loc, cluster_id in zip(locations, clusters):\n",
|
|
" x, y = loc\n",
|
|
" if start_y <= y < end_y and start_x <= x < end_x:\n",
|
|
" index = (div_y * division[1] + div_x) * num_clusters + cluster_id\n",
|
|
" spatial_histogram[index] += 1\n",
|
|
" return spatial_histogram"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0d516e6a",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Test (Should output ALL PASS)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a269b35f",
|
|
"metadata": {},
|
|
"source": [
|
|
"Restart and Run ALL for each submission"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "95d72a5c",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Testing division: [1, 1]\n",
|
|
"PASS: Book\n",
|
|
"PASS: Mount\n",
|
|
"PASS: Notre\n",
|
|
"Testing division: [2, 2]\n",
|
|
"PASS: Book\n",
|
|
"PASS: Mount\n",
|
|
"PASS: Notre\n",
|
|
"Testing division: [2, 3]\n",
|
|
"PASS: Book\n",
|
|
"PASS: Mount\n",
|
|
"PASS: Notre\n",
|
|
"ALL PASS\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"histograms = []\n",
|
|
"for division in [ [1, 1], [2, 2], [2, 3] ]:\n",
|
|
" print('Testing division:', division)\n",
|
|
"\n",
|
|
" m1_histogram_book = m1_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
|
|
" m1_histogram_mount = m1_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
|
|
" m1_histogram_notre = m1_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
|
|
"\n",
|
|
" m2_histogram_book = m2_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
|
|
" m2_histogram_mount = m2_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
|
|
" m2_histogram_notre = m2_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
|
|
"\n",
|
|
" # Students' implementations\n",
|
|
" histogram_book = generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
|
|
" histogram_mount = generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
|
|
" histogram_notre = generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
|
|
" \n",
|
|
" assert np.allclose(m1_histogram_book, m2_histogram_book)\n",
|
|
" assert np.allclose(m1_histogram_book, histogram_book)\n",
|
|
" print(\"PASS: Book\")\n",
|
|
"\n",
|
|
" assert np.allclose(m1_histogram_mount, m2_histogram_mount)\n",
|
|
" assert np.allclose(m1_histogram_mount, histogram_mount)\n",
|
|
" print(\"PASS: Mount\")\n",
|
|
"\n",
|
|
" assert np.allclose(m1_histogram_notre, m2_histogram_notre)\n",
|
|
" assert np.allclose(m1_histogram_notre, histogram_notre)\n",
|
|
" print(\"PASS: Notre\")\n",
|
|
"\n",
|
|
" histograms.append( [m1_histogram_book, m1_histogram_mount, m1_histogram_notre] )\n",
|
|
"print(\"ALL PASS\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "00a11df8",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Save Output"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "ea1fadb1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"D:\\Anaconda3\\envs\\what\\lib\\site-packages\\numpy\\lib\\npyio.py:521: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
|
|
" arr = np.asanyarray(arr)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"np.save('data/question_3_histogram.npy', histograms)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ddc3036c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "what",
|
|
"language": "python",
|
|
"name": "what"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|