{ "cells": [ { "cell_type": "markdown", "id": "f96d2a83", "metadata": {}, "source": [ "## Question 2 (20 marks)" ] }, { "cell_type": "code", "execution_count": 1, "id": "6f53891a", "metadata": {}, "outputs": [], "source": [ "import cv2\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "d3bbb31a", "metadata": {}, "outputs": [], "source": [ "n_clusters = 100" ] }, { "cell_type": "markdown", "id": "28068e50", "metadata": {}, "source": [ "Read images" ] }, { "cell_type": "code", "execution_count": 3, "id": "87dd5c72", "metadata": {}, "outputs": [], "source": [ "im_book = cv2.imread('data/books.jpg', cv2.IMREAD_GRAYSCALE)\n", "im_mount = cv2.imread('data/mount_rushmore_1.jpg', cv2.IMREAD_GRAYSCALE)\n", "im_notre = cv2.imread('data/notre_dame_1.jpg', cv2.IMREAD_GRAYSCALE)" ] }, { "cell_type": "markdown", "id": "79c57454", "metadata": {}, "source": [ "## Generate Clusters" ] }, { "cell_type": "code", "execution_count": 4, "id": "2e571277", "metadata": {}, "outputs": [], "source": [ "def get_features(image):\n", " image = image[:, :, np.newaxis]\n", "\n", " # Initialize a SIFT detector\n", " sift = cv2.SIFT_create()\n", "\n", " # Detect keypoints and compute descriptors\n", " keypoints, descriptors = sift.detectAndCompute(image, None)\n", "\n", " return keypoints, descriptors" ] }, { "cell_type": "code", "execution_count": 5, "id": "12d042c8", "metadata": {}, "outputs": [], "source": [ "from sklearn.cluster import KMeans\n", "\n", "def get_clusters(keypoints, descriptors, n_clusters=100):\n", "\n", " # Perform k-means clustering\n", " kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)\n", " kmeans.fit(descriptors)\n", "\n", " # Assign descriptors to clusters\n", " clusters = kmeans.predict(descriptors)\n", "\n", " # Convert keypoints to locations (x, y coordinates)\n", " locations = np.array([kp.pt for kp in keypoints], dtype=np.int64)\n", " \n", " return clusters, locations" ] }, { "cell_type": "code", "execution_count": 6, "id": "b04681a0", "metadata": {}, "outputs": [], "source": [ "kpts_book, des_book = get_features(im_book)\n", "kpts_mount, des_mount = get_features(im_mount)\n", "kpts_notre, des_notre = get_features(im_notre)" ] }, { "cell_type": "code", "execution_count": 7, "id": "70d23c90", "metadata": {}, "outputs": [], "source": [ "clusters_book, locations_book = get_clusters(kpts_book, des_book, n_clusters=n_clusters)\n", "clusters_mount, locations_mount = get_clusters(kpts_mount, des_mount, n_clusters=n_clusters)\n", "clusters_notre, locations_notre = get_clusters(kpts_notre, des_notre, n_clusters=n_clusters)" ] }, { "cell_type": "markdown", "id": "1518e19f", "metadata": {}, "source": [ "## Method 1 (Two FOR loops)" ] }, { "cell_type": "code", "execution_count": 8, "id": "04f3901b", "metadata": {}, "outputs": [], "source": [ "def m1_generate_bovw_spatial_histogram(im, locations, clusters, division):\n", " \"\"\"\n", " Create bag of visual words representation of an image based on the division information.\n", " \n", " Parameters:\n", " im (numpy.ndarray): Image array of data type uint8.\n", " locations (numpy.ndarray): Array of shape (N, 2) with Cartesian coordinates (x, y).\n", " clusters (numpy.ndarray): Array of shape (N,) with quantised cluster id.\n", " division (list): List of integers of length 2 indicating division along Y and X axes.\n", " \n", " Returns:\n", " numpy.ndarray: 1-dimensional array representing the BoVW spatial histogram.\n", " \"\"\"\n", "\n", " # Determine the size of each division\n", " div_height = im.shape[0] // division[0]\n", " div_width = im.shape[1] // division[1]\n", " \n", " # Initialize the histogram\n", " num_clusters = np.unique(clusters).size\n", " histogram = np.zeros((division[0] * division[1] * num_clusters,), dtype=np.int64)\n", "\n", " # Two FOR loops\n", " for div_y in range(division[0]):\n", " for div_x in range(division[1]):\n", " # Define the bounds of the current division\n", " y_start = div_y * div_height\n", " y_end = (div_y + 1) * div_height\n", " x_start = div_x * div_width\n", " x_end = (div_x + 1) * div_width\n", "\n", " # Find features within the current division\n", " div_mask = (locations[:, 1] >= y_start) & (locations[:, 1] < y_end) & \\\n", " (locations[:, 0] >= x_start) & (locations[:, 0] < x_end)\n", " div_locations = locations[div_mask]\n", " div_clusters = clusters[div_mask]\n", "\n", " # Calculate the histogram for the current division\n", " for i in range(num_clusters):\n", " cluster_mask = (div_clusters == i)\n", " histogram[div_y * division[1] * num_clusters + div_x * num_clusters + i] = np.sum(cluster_mask)\n", " \n", " return histogram" ] }, { "cell_type": "markdown", "id": "0f64e74a", "metadata": {}, "source": [ "## Method 2 (One FOR loop)" ] }, { "cell_type": "code", "execution_count": 9, "id": "a597d45d", "metadata": {}, "outputs": [], "source": [ "def m2_generate_bovw_spatial_histogram(im, locations, clusters, division):\n", "\n", " img_shape = np.shape(im)\n", "\n", " height = img_shape[0]\n", " width = img_shape[1]\n", "\n", " ## Possible Mistakes: Some students swapped x and y\n", " div_x = division[1]\n", " div_y = division[0]\n", "\n", " x_size = width / div_x\n", " y_size = height / div_y\n", "\n", " num_divisions = division[0] * division[1]\n", "\n", " num_clusters = np.max(clusters) + 1\n", "\n", " histogram = np.zeros(num_clusters * num_divisions)\n", "\n", " # One FOR loop\n", " for i in range(len(locations)):\n", " point = locations[i]\n", " cluster = clusters[i]\n", "\n", " x_div = np.ceil((point[0] + 1) / x_size).astype(np.int64) - 1\n", " y_div = np.ceil((point[1] + 1) / y_size).astype(np.int64) - 1\n", "\n", " # Possible Mistakes: Some students miscalculated the boundary condition\n", " # x_div = np.ceil(point[0] / x_size).astype(np.int64) - 1\n", " # y_div = np.ceil(point[1] / y_size).astype(np.int64) - 1\n", "\n", " # Calculate the array position\n", " div = x_div + (y_div * div_x)\n", " array_pos = (div * num_clusters) + cluster\n", " \n", " # Update the histogram\n", " histogram[array_pos] = histogram[array_pos] + 1\n", "\n", " return histogram.astype(int)" ] }, { "cell_type": "markdown", "id": "36f2fe5e", "metadata": {}, "source": [ "## Put students' implementations here" ] }, { "cell_type": "code", "execution_count": 10, "id": "3804d695", "metadata": {}, "outputs": [], "source": [ "# Be careful, some students used a different function name (e.g. bowv rather than bovw)\n", "def generate_bovw_spatial_histogram(im, locations, clusters, division):\n", " # Determine the number of clusters\n", " num_clusters = np.unique(clusters).size\n", "\n", " # Initialize histogram\n", " spatial_histogram = np.zeros(num_clusters * np.prod(division), dtype=np.int64)\n", "\n", " div_size_y = im.shape[0] // division[0]\n", " div_size_x = im.shape[1] // division[1]\n", "\n", " for div_y in range(division[0]):\n", " for div_x in range(division[1]):\n", " start_y = div_y * div_size_y\n", " end_y = (div_y + 1) * div_size_y if div_y < division[0] - 1 else im.shape[0]\n", " start_x = div_x * div_size_x\n", " end_x = (div_x + 1) * div_size_x if div_x < division[1] - 1 else im.shape[1]\n", " for loc, cluster_id in zip(locations, clusters):\n", " x, y = loc\n", " if start_y <= y < end_y and start_x <= x < end_x:\n", " index = (div_y * division[1] + div_x) * num_clusters + cluster_id\n", " spatial_histogram[index] += 1\n", " return spatial_histogram" ] }, { "cell_type": "markdown", "id": "0d516e6a", "metadata": {}, "source": [ "## Test (Should output ALL PASS)" ] }, { "cell_type": "markdown", "id": "a269b35f", "metadata": {}, "source": [ "Restart and Run ALL for each submission" ] }, { "cell_type": "code", "execution_count": 11, "id": "95d72a5c", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing division: [1, 1]\n", "PASS: Book\n", "PASS: Mount\n", "PASS: Notre\n", "Testing division: [2, 2]\n", "PASS: Book\n", "PASS: Mount\n", "PASS: Notre\n", "Testing division: [2, 3]\n", "PASS: Book\n", "PASS: Mount\n", "PASS: Notre\n", "ALL PASS\n" ] } ], "source": [ "histograms = []\n", "for division in [ [1, 1], [2, 2], [2, 3] ]:\n", " print('Testing division:', division)\n", "\n", " m1_histogram_book = m1_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n", " m1_histogram_mount = m1_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n", " m1_histogram_notre = m1_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n", "\n", " m2_histogram_book = m2_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n", " m2_histogram_mount = m2_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n", " m2_histogram_notre = m2_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n", "\n", " # Students' implementations\n", " histogram_book = generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n", " histogram_mount = generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n", " histogram_notre = generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n", " \n", " assert np.allclose(m1_histogram_book, m2_histogram_book)\n", " assert np.allclose(m1_histogram_book, histogram_book)\n", " print(\"PASS: Book\")\n", "\n", " assert np.allclose(m1_histogram_mount, m2_histogram_mount)\n", " assert np.allclose(m1_histogram_mount, histogram_mount)\n", " print(\"PASS: Mount\")\n", "\n", " assert np.allclose(m1_histogram_notre, m2_histogram_notre)\n", " assert np.allclose(m1_histogram_notre, histogram_notre)\n", " print(\"PASS: Notre\")\n", "\n", " histograms.append( [m1_histogram_book, m1_histogram_mount, m1_histogram_notre] )\n", "print(\"ALL PASS\")" ] }, { "cell_type": "markdown", "id": "00a11df8", "metadata": {}, "source": [ "## Save Output" ] }, { "cell_type": "code", "execution_count": 12, "id": "ea1fadb1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\Anaconda3\\envs\\what\\lib\\site-packages\\numpy\\lib\\npyio.py:521: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", " arr = np.asanyarray(arr)\n" ] } ], "source": [ "np.save('data/question_3_histogram.npy', histograms)" ] }, { "cell_type": "code", "execution_count": null, "id": "ddc3036c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "what", "language": "python", "name": "what" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }