ECMM426-Template/Question 2.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f96d2a83",
   "metadata": {},
   "source": [
    "## Question 2 (20 marks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6f53891a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d3bbb31a",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_clusters = 100"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28068e50",
   "metadata": {},
   "source": [
    "Read images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "87dd5c72",
   "metadata": {},
   "outputs": [],
   "source": [
    "im_book = cv2.imread('data/books.jpg', cv2.IMREAD_GRAYSCALE)\n",
    "im_mount = cv2.imread('data/mount_rushmore_1.jpg', cv2.IMREAD_GRAYSCALE)\n",
    "im_notre = cv2.imread('data/notre_dame_1.jpg', cv2.IMREAD_GRAYSCALE)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79c57454",
   "metadata": {},
   "source": [
    "## Generate Clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2e571277",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_features(image):\n",
    "    image = image[:, :, np.newaxis]\n",
    "\n",
    "    # Initialize a SIFT detector\n",
    "    sift = cv2.SIFT_create()\n",
    "\n",
    "    # Detect keypoints and compute descriptors\n",
    "    keypoints, descriptors = sift.detectAndCompute(image, None)\n",
    "\n",
    "    return keypoints, descriptors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "12d042c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "\n",
    "def get_clusters(keypoints, descriptors, n_clusters=100):\n",
    "\n",
    "    # Perform k-means clustering\n",
    "    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)\n",
    "    kmeans.fit(descriptors)\n",
    "\n",
    "    # Assign descriptors to clusters\n",
    "    clusters = kmeans.predict(descriptors)\n",
    "\n",
    "    # Convert keypoints to locations (x, y coordinates)\n",
    "    locations = np.array([kp.pt for kp in keypoints], dtype=np.int64)\n",
    "    \n",
    "    return clusters, locations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b04681a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "kpts_book, des_book = get_features(im_book)\n",
    "kpts_mount, des_mount = get_features(im_mount)\n",
    "kpts_notre, des_notre = get_features(im_notre)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "70d23c90",
   "metadata": {},
   "outputs": [],
   "source": [
    "clusters_book, locations_book = get_clusters(kpts_book, des_book, n_clusters=n_clusters)\n",
    "clusters_mount, locations_mount = get_clusters(kpts_mount, des_mount, n_clusters=n_clusters)\n",
    "clusters_notre, locations_notre = get_clusters(kpts_notre, des_notre, n_clusters=n_clusters)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1518e19f",
   "metadata": {},
   "source": [
    "## Method 1 (Two FOR loops)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "04f3901b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def m1_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
    "    \"\"\"\n",
    "    Create bag of visual words representation of an image based on the division information.\n",
    "    \n",
    "    Parameters:\n",
    "    im (numpy.ndarray): Image array of data type uint8.\n",
    "    locations (numpy.ndarray): Array of shape (N, 2) with Cartesian coordinates (x, y).\n",
    "    clusters (numpy.ndarray): Array of shape (N,) with quantised cluster id.\n",
    "    division (list): List of integers of length 2 indicating division along Y and X axes.\n",
    "    \n",
    "    Returns:\n",
    "    numpy.ndarray: 1-dimensional array representing the BoVW spatial histogram.\n",
    "    \"\"\"\n",
    "\n",
    "    # Determine the size of each division\n",
    "    div_height = im.shape[0] // division[0]\n",
    "    div_width = im.shape[1] // division[1]\n",
    "    \n",
    "    # Initialize the histogram\n",
    "    num_clusters = np.unique(clusters).size\n",
    "    histogram = np.zeros((division[0] * division[1] * num_clusters,), dtype=np.int64)\n",
    "\n",
    "    # Two FOR loops\n",
    "    for div_y in range(division[0]):\n",
    "        for div_x in range(division[1]):\n",
    "            # Define the bounds of the current division\n",
    "            y_start = div_y * div_height\n",
    "            y_end = (div_y + 1) * div_height\n",
    "            x_start = div_x * div_width\n",
    "            x_end = (div_x + 1) * div_width\n",
    "\n",
    "            # Find features within the current division\n",
    "            div_mask = (locations[:, 1] >= y_start) & (locations[:, 1] < y_end) & \\\n",
    "                       (locations[:, 0] >= x_start) & (locations[:, 0] < x_end)\n",
    "            div_locations = locations[div_mask]\n",
    "            div_clusters = clusters[div_mask]\n",
    "\n",
    "            # Calculate the histogram for the current division\n",
    "            for i in range(num_clusters):\n",
    "                cluster_mask = (div_clusters == i)\n",
    "                histogram[div_y * division[1] * num_clusters + div_x * num_clusters + i] = np.sum(cluster_mask)\n",
    "    \n",
    "    return histogram"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f64e74a",
   "metadata": {},
   "source": [
    "## Method 2 (One FOR loop)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a597d45d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def m2_generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
    "\n",
    "    img_shape = np.shape(im)\n",
    "\n",
    "    height = img_shape[0]\n",
    "    width = img_shape[1]\n",
    "\n",
    "    ## Possible Mistakes: Some students swapped x and y\n",
    "    div_x = division[1]\n",
    "    div_y = division[0]\n",
    "\n",
    "    x_size = width / div_x\n",
    "    y_size = height / div_y\n",
    "\n",
    "    num_divisions = division[0] * division[1]\n",
    "\n",
    "    num_clusters = np.max(clusters) + 1\n",
    "\n",
    "    histogram = np.zeros(num_clusters * num_divisions)\n",
    "\n",
    "    # One FOR loop\n",
    "    for i in range(len(locations)):\n",
    "        point = locations[i]\n",
    "        cluster = clusters[i]\n",
    "\n",
    "        x_div = np.ceil((point[0] + 1) / x_size).astype(np.int64) - 1\n",
    "        y_div = np.ceil((point[1] + 1) / y_size).astype(np.int64) - 1\n",
    "\n",
    "        # Possible Mistakes: Some students miscalculated the boundary condition\n",
    "        # x_div = np.ceil(point[0] / x_size).astype(np.int64) - 1\n",
    "        # y_div = np.ceil(point[1] / y_size).astype(np.int64) - 1\n",
    "\n",
    "        # Calculate the array position\n",
    "        div = x_div + (y_div * div_x)\n",
    "        array_pos = (div * num_clusters) + cluster\n",
    "        \n",
    "        # Update the histogram\n",
    "        histogram[array_pos] = histogram[array_pos] + 1\n",
    "\n",
    "    return histogram.astype(int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36f2fe5e",
   "metadata": {},
   "source": [
    "## Put students' implementations here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3804d695",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Be careful, some students used a different function name (e.g. bowv rather than bovw)\n",
    "def generate_bovw_spatial_histogram(im, locations, clusters, division):\n",
    "    # Determine the number of clusters\n",
    "    num_clusters = np.unique(clusters).size\n",
    "\n",
    "    # Initialize histogram\n",
    "    spatial_histogram = np.zeros(num_clusters * np.prod(division), dtype=np.int64)\n",
    "\n",
    "    div_size_y = im.shape[0] // division[0]\n",
    "    div_size_x = im.shape[1] // division[1]\n",
    "\n",
    "    for div_y in range(division[0]):\n",
    "        for div_x in range(division[1]):\n",
    "            start_y = div_y * div_size_y\n",
    "            end_y = (div_y + 1) * div_size_y if div_y < division[0] - 1 else im.shape[0]\n",
    "            start_x = div_x * div_size_x\n",
    "            end_x = (div_x + 1) * div_size_x if div_x < division[1] - 1 else im.shape[1]\n",
    "            for loc, cluster_id in zip(locations, clusters):\n",
    "                x, y = loc\n",
    "                if start_y <= y < end_y and start_x <= x < end_x:\n",
    "                    index = (div_y * division[1] + div_x) * num_clusters + cluster_id\n",
    "                    spatial_histogram[index] += 1\n",
    "    return spatial_histogram"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d516e6a",
   "metadata": {},
   "source": [
    "## Test (Should output ALL PASS)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a269b35f",
   "metadata": {},
   "source": [
    "Restart and Run ALL for each submission"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "95d72a5c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Testing division: [1, 1]\n",
      "PASS: Book\n",
      "PASS: Mount\n",
      "PASS: Notre\n",
      "Testing division: [2, 2]\n",
      "PASS: Book\n",
      "PASS: Mount\n",
      "PASS: Notre\n",
      "Testing division: [2, 3]\n",
      "PASS: Book\n",
      "PASS: Mount\n",
      "PASS: Notre\n",
      "ALL PASS\n"
     ]
    }
   ],
   "source": [
    "histograms = []\n",
    "for division in [ [1, 1], [2, 2], [2, 3] ]:\n",
    "    print('Testing division:', division)\n",
    "\n",
    "    m1_histogram_book = m1_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
    "    m1_histogram_mount = m1_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
    "    m1_histogram_notre = m1_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
    "\n",
    "    m2_histogram_book = m2_generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
    "    m2_histogram_mount = m2_generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
    "    m2_histogram_notre = m2_generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
    "\n",
    "    # Students' implementations\n",
    "    histogram_book = generate_bovw_spatial_histogram(im_book, locations_book, clusters_book, division)\n",
    "    histogram_mount = generate_bovw_spatial_histogram(im_mount, locations_mount, clusters_mount, division)\n",
    "    histogram_notre = generate_bovw_spatial_histogram(im_notre, locations_notre, clusters_notre, division)\n",
    "    \n",
    "    assert np.allclose(m1_histogram_book, m2_histogram_book)\n",
    "    assert np.allclose(m1_histogram_book, histogram_book)\n",
    "    print(\"PASS: Book\")\n",
    "\n",
    "    assert np.allclose(m1_histogram_mount, m2_histogram_mount)\n",
    "    assert np.allclose(m1_histogram_mount, histogram_mount)\n",
    "    print(\"PASS: Mount\")\n",
    "\n",
    "    assert np.allclose(m1_histogram_notre, m2_histogram_notre)\n",
    "    assert np.allclose(m1_histogram_notre, histogram_notre)\n",
    "    print(\"PASS: Notre\")\n",
    "\n",
    "    histograms.append( [m1_histogram_book, m1_histogram_mount, m1_histogram_notre] )\n",
    "print(\"ALL PASS\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00a11df8",
   "metadata": {},
   "source": [
    "## Save Output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ea1fadb1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\envs\\what\\lib\\site-packages\\numpy\\lib\\npyio.py:521: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
      "  arr = np.asanyarray(arr)\n"
     ]
    }
   ],
   "source": [
    "np.save('data/question_3_histogram.npy', histograms)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddc3036c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "what",
   "language": "python",
   "name": "what"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}