[Question 4-6] Add a good model

[Question 1] Import libraries
2024-03-11 18:32:32 +00:00 · 2024-03-11 18:28:11 +00:00
3 changed files with 289 additions and 66 deletions
--- a/1.ipynb
+++ b/1.ipynb
@@ -113,7 +113,7 @@
    {
     "data": {
      "text/plain": [
-       "<matplotlib.image.AxesImage at 0x1f7c0c56340>"
+       "<matplotlib.image.AxesImage at 0x1a0a1a77340>"
      ]
     },
     "execution_count": 6,
@@ -334,7 +334,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from scipy import ndimage"
+    "from scipy import ndimage\n",
    "from scipy.ndimage import convolve"
   ]
  },
  {
@@ -539,30 +540,68 @@
   "outputs": [],
   "source": [
    "def compute_gradient_magnitude(gr_im, kx, ky):\n",
-    "    # Ensure the image is a float64 for computation\n",
+    "    \"\"\"\n",
-    "    gr_im_float64 = gr_im.astype(np.float64)\n",
+    "    Compute gradient magnitude of a grey image with given kernels.\n",
    "\n",
-    "    # Compute gradients in x and y direction\n",
+    "    Parameters:\n",
-    "    grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n",
+    "    - gr_im: 2D numpy array, input grey image.\n",
-    "    grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n",
+    "    - kx: 2D numpy array, horizontal kernel.\n",
    "    - ky: 2D numpy array, vertical kernel.\n",
    "\n",
-    "    # Compute gradient magnitude\n",
+    "    Returns:\n",
-    "    magnitude = np.sqrt(grad_x**2 + grad_y**2)\n",
+    "    - grad_mag: 2D numpy array, gradient magnitude.\n",
    "    \"\"\"\n",
    "    # Validate input gr_im\n",
    "    if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
    "        raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
    "    # Convert inputs to float64 for computation\n",
    "    gr_im = gr_im.astype(np.float64)\n",
    "    kx = kx.astype(np.float64)\n",
    "    ky = ky.astype(np.float64)\n",
    "    \n",
-    "    return magnitude\n",
+    "    # Compute horizontal and vertical gradients using convolution\n",
    "    grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
    "    grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
    "    \n",
    "    # Compute gradient magnitude\n",
    "    grad_mag = np.sqrt(grad_x**2 + grad_y**2)\n",
    "    \n",
    "    print(\"Gradient Magnitude Array:\")\n",
    "    print(grad_mag)\n",
    "    \n",
    "    return grad_mag.astype(np.float64)\n",
    "\n",
    "def compute_gradient_direction(gr_im, kx, ky):\n",
-    "    # Ensure the image is a float64 for computation\n",
+    "    \"\"\"\n",
-    "    gr_im_float64 = gr_im.astype(np.float64)\n",
+    "    Compute gradient direction of a grey image with given kernels.\n",
    "    \n",
    "    # Compute gradients in x and y direction\n",
    "    grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n",
    "    grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n",
    "\n",
-    "    # Compute gradient direction\n",
+    "    Parameters:\n",
-    "    direction = np.arctan2(grad_y, grad_x)\n",
+    "    - gr_im: 2D numpy array, input grey image.\n",
    "    - kx: 2D numpy array, horizontal kernel.\n",
    "    - ky: 2D numpy array, vertical kernel.\n",
    "\n",
    "    Returns:\n",
    "    - grad_dir: 2D numpy array, gradient direction.\n",
    "    \"\"\"\n",
    "    # Validate input gr_im\n",
    "    if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
    "        raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
    "    # Convert inputs to float64 for computation\n",
    "    gr_im = gr_im.astype(np.float64)\n",
    "    kx = kx.astype(np.float64)\n",
    "    ky = ky.astype(np.float64)\n",
    "    \n",
-    "    return direction"
+    "    # Compute horizontal and vertical gradients using convolution\n",
    "    grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
    "    grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
    "    \n",
    "    # Compute gradient direction\n",
    "    grad_dir = np.arctan2(grad_y, grad_x)\n",
    "    \n",
    "    print(\"Gradient Direction Array:\")\n",
    "    print(grad_dir)\n",
    "    \n",
    "    return grad_dir.astype(np.float64)\n"
   ]
  },
  {
@@ -582,15 +621,38 @@
   "execution_count": 23,
   "id": "63663950",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gradient Magnitude Array:\n",
      "[[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n",
      "Gradient Direction Array:\n",
      "[[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "# For convolution\n",
-    "# magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n",
+    "magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n",
-    "# direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n",
+    "direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n",
    "\n",
    "# For Cross-Correlation\n",
-    "magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n",
+    "# magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n",
-    "direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)"
+    "# direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)"
   ]
  },
  {
@@ -628,25 +690,46 @@
    }
   ],
   "source": [
-    "assert np.allclose(m1_magnitude, magnitude), np.allclose(m1_direction, direction)\n",
+    "all_pass = 0\n",
    "print (\"PASS: Method 1\")\n",
    "\n",
-    "assert np.allclose(m2_magnitude, magnitude), np.allclose(m2_direction, direction)\n",
+    "try:\n",
-    "print (\"PASS: Method 2\")\n",
+    "    assert np.allclose(m1_magnitude, magnitude), np.allclose(m1_direction, direction)\n",
    "    print (\"PASS: Method 1\")\n",
    "    all_pass = all_pass + 1\n",
    "except AssertionError as e:\n",
    "    print(\"Fail: Method 1\", e)\n",
    "\n",
-    "assert np.allclose(m3_magnitude, magnitude), np.allclose(m3_direction, direction)\n",
+    "try:\n",
-    "print (\"PASS: Method 3\")\n",
+    "    assert np.allclose(m2_magnitude, magnitude), np.allclose(m2_direction, direction)\n",
    "    print (\"PASS: Method 2\")\n",
    "    all_pass = all_pass + 1\n",
    "except AssertionError as e:\n",
    "    print(\"Fail: Method 2\", e)\n",
    "\n",
-    "assert np.allclose(m4_magnitude, magnitude), np.allclose(m4_direction, direction)\n",
+    "try:\n",
-    "print (\"PASS: Method 4\")\n",
+    "    assert np.allclose(m3_magnitude, magnitude), np.allclose(m3_direction, direction)\n",
    "    print (\"PASS: Method 3\")\n",
    "    all_pass = all_pass + 1\n",
    "except AssertionError as e:\n",
    "    print(\"Fail: Method 1\", e)\n",
    "\n",
-    "print (\"ALL PASS\")"
+    "try:\n",
    "    assert np.allclose(m4_magnitude, magnitude), np.allclose(m4_direction, direction)\n",
    "    print (\"PASS: Method 4\")\n",
    "    all_pass = all_pass + 1\n",
    "except AssertionError as e:\n",
    "    print(\"Fail: Method 1\", e)\n",
    "\n",
    "if all_pass == 4:\n",
    "    print (\"ALL PASS\")\n",
    "else:\n",
    "    print(f\"{all_pass} Passed, {4 - all_pass} Failed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "fb616c86",
+   "id": "cc4a6cdb",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/4-6.ipynb
+++ b/4-6.ipynb
@@ -16,6 +16,7 @@
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn.functional as F\n",
    "from ca_utils import ResNet, BasicBlock"
   ]
  },
@@ -96,7 +97,8 @@
    }
   ],
   "source": [
-    "checkpoint = torch.load(\"data/weights_resnet.pth\")\n",
+    "checkpoint = torch.load(\"data/weights_resnet.pth\", map_location=torch.device('cpu'))\n",
    "\n",
    "model.load_state_dict(checkpoint)\n",
    "model.eval()"
   ]
@@ -118,7 +120,9 @@
   "source": [
    "import torchvision\n",
    "from torch.utils.data import DataLoader\n",
-    "from torchvision import transforms"
+    "from torchvision import transforms\n",
    "\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
   ]
  },
  {
@@ -163,23 +167,32 @@
   "outputs": [],
   "source": [
    "def m1_test_cnn(model, test_loader):\n",
-    "    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "\n",
    "    model.to(device)\n",
    "    model.eval()\n",
    "\n",
    "    correct = 0\n",
    "    total = 0\n",
    "    all_predicted_labels = []\n",
    "\n",
    "    with torch.no_grad():\n",
    "        for images, labels in test_loader:\n",
    "\n",
    "            # Make predictions\n",
    "            images, labels = images.to(device), labels.to(device)\n",
    "            outputs = model(images)\n",
    "\n",
    "            _, predicted = torch.max(outputs.data, 1)\n",
    "\n",
    "            # Save results\n",
    "            total += labels.size(0)\n",
    "            correct += (predicted == labels).sum().item()\n",
    "            \n",
    "            all_predicted_labels.append(predicted.cpu().numpy())\n",
    "\n",
    "    accuracy = 100 * correct / total\n",
    "    all_predicted_labels = np.concatenate(all_predicted_labels)\n",
    "\n",
    "    return all_predicted_labels, accuracy"
   ]
  },
@@ -187,13 +200,15 @@
   "cell_type": "code",
   "execution_count": 8,
   "id": "3fcb0a3a",
-   "metadata": {},
+   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Test Accuracy: 67.85%\n"
+      "Test Accuracy: 75.8%\n"
     ]
    }
   ],
@@ -218,9 +233,36 @@
   "outputs": [],
   "source": [
    "def test_cnn(model, test_loader):\n",
-    "    all_predicted_labels, accuracy = 0, 0\n",
+    "    \"\"\"\n",
    "    Test the trained ResNet model on the test dataset.\n",
    "\n",
-    "    return all_predicted_labels, accuracy"
+    "    Args:\n",
    "        model (nn.Module): The trained ResNet model.\n",
    "        test_loader (DataLoader): Data loader for the test data.\n",
    "    \n",
    "    Returns:\n",
    "        float: Test accuracy.\n",
    "        list: Predicted labels.\n",
    "        list: True labels.\n",
    "    \"\"\"\n",
    "    model.eval()\n",
    "    correct = 0\n",
    "    total = 0\n",
    "    predicted_labels = []\n",
    "    true_labels = []\n",
    "\n",
    "    with torch.no_grad():\n",
    "        for images, labels in test_loader:\n",
    "            outputs = model(images)\n",
    "            _, predicted = torch.max(outputs.data, 1)\n",
    "            total += labels.size(0)\n",
    "            correct += (predicted == labels).sum().item()\n",
    "            predicted_labels.extend(predicted.tolist())\n",
    "            true_labels.extend(labels.tolist())\n",
    "\n",
    "    accuracy = correct / total\n",
    "\n",
    "    return predicted_labels, accuracy*100"
   ]
  },
  {
@@ -233,7 +275,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Test Accuracy: 0%\n"
+      "Test Accuracy: 75.8%\n"
     ]
    }
   ],
@@ -242,6 +284,53 @@
    "print(f'Test Accuracy: {test_accuracy}%')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17bc1948",
   "metadata": {},
   "source": [
    "### Test (Should output ALL PASS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "853c4db3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test accuracy:  75.8\n",
      "Score 100%: 15.0\n",
      "ALL PASS\n"
     ]
    }
   ],
   "source": [
    "assert np.allclose(predicted_labels, m1_predicted_labels)\n",
    "assert np.allclose(test_accuracy, m1_test_accuracy)\n",
    "\n",
    "print(\"Test accuracy: \", test_accuracy)\n",
    "\n",
    "if (test_accuracy >= 75):\n",
    "    print(\"Score 100%:\", 15 * 1.0)\n",
    "elif (test_accuracy >= 70):\n",
    "    print(\"Score 90%:\", 15 * 0.90)\n",
    "elif (test_accuracy >= 65):\n",
    "    print(\"Score 80%:\", 15 * 0.80)\n",
    "elif (test_accuracy >= 60):\n",
    "    print(\"Score 70%:\", 15 * 0.70)\n",
    "elif (test_accuracy >= 55):\n",
    "    print(\"Score 60%:\", 15 * 0.60)\n",
    "elif (test_accuracy >= 50):\n",
    "    print(\"Score 50%:\", 15 * 0.50)\n",
    "else:\n",
    "    print(\"Accuracy less than 50%\")\n",
    "print(\"ALL PASS\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cef7dc17",
@@ -252,13 +341,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "0990f3b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "true_labels = []\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "for images, labels in test_loader:\n",
    "    images, labels = images.to(device), labels.to(device)\n",
@@ -269,12 +357,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "8da35032",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def compute_confusion_matrix(true, predictions):\n",
+    "def m1_compute_confusion_matrix(true, predictions):\n",
    "    unique_labels = np.unique(np.concatenate((true, predictions)))\n",
    "\n",
    "    confusion_mat = np.zeros((len(unique_labels), len(unique_labels)), dtype=np.int64)\n",
@@ -295,35 +383,87 @@
   "execution_count": 14,
   "id": "16b6f9e7",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
+   "source": [
-     "data": {
+    "m1_confusion_matrix = m1_compute_confusion_matrix(true_labels, m1_predicted_labels)"
      "text/plain": [
       "array([[168,   2,   5,   0,   6,   0,   2,   0,   6,  11],\n",
       "       [ 13, 108,   2,  18,  29,  16,   1,  13,   0,   0],\n",
       "       [  1,   1, 162,   0,   1,   0,   7,   0,   0,  28],\n",
       "       [  0,   7,   2,  74,  46,  40,   2,  26,   1,   2],\n",
       "       [  1,   2,   2,   6, 166,   9,   6,   8,   0,   0],\n",
       "       [  1,   3,   0,  11,  37, 108,  27,  12,   1,   0],\n",
       "       [  1,   0,   1,   0,  40,  30, 121,   4,   0,   3],\n",
       "       [  1,   7,   1,   6,  32,  24,   2, 127,   0,   0],\n",
       "       [ 11,   1,   2,   2,   1,   1,   0,   0, 153,  29],\n",
       "       [  4,   0,  13,   2,   5,   0,   1,   0,   5, 170]], dtype=int64)"
   ]
  },
-     "execution_count": 14,
+  {
   "cell_type": "markdown",
   "id": "608265af",
   "metadata": {},
-     "output_type": "execute_result"
+   "source": [
    "### Put Students' implementations here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "1dce952c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_confusion_matrix(true_labels, predicted_labels):\n",
    "\n",
    "  # Ensure inputs are NumPy arrays\n",
    "  true_labels = np.array(true_labels)\n",
    "  predicted_labels = np.array(predicted_labels)\n",
    "\n",
    "  # Determine the number of classes\n",
    "  num_classes = len(np.unique(true_labels))\n",
    "\n",
    "  # Initialize the confusion matrix with zeros\n",
    "  cm = np.zeros((num_classes, num_classes))\n",
    "\n",
    "  # Count occurrences of true-predicted label pairs\n",
    "  for i in range(len(true_labels)):\n",
    "    cm[true_labels[i]][predicted_labels[i]] += 1\n",
    "\n",
    "  return cm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "21917014",
   "metadata": {},
   "outputs": [],
   "source": [
    "confusion_matrix = m1_compute_confusion_matrix(true_labels, m1_predicted_labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "935956b7",
   "metadata": {},
   "source": [
    "### Test (Should output ALL PASS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "b77da2e8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ALL PASS\n"
     ]
    }
   ],
   "source": [
-    "compute_confusion_matrix(true_labels, m1_predicted_labels)"
+    "assert np.allclose(confusion_matrix, m1_confusion_matrix)\n",
    "\n",
    "print(\"ALL PASS\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "1dce952c",
+   "id": "adc0a7c7",
   "metadata": {},
   "outputs": [],
   "source": []
@@ -333,7 +473,7 @@
  "kernelspec": {
   "display_name": "what",
   "language": "python",
-   "name": "python3"
+   "name": "what"
  },
  "language_info": {
   "codemirror_mode": {
--- a/data/weights_resnet.pth
+++ b/data/weights_resnet.pth
Author	SHA1	Message	Date
wuhanstudio	8c78f23f98	[Question 4-6] Add a good model	2024-03-11 18:32:32 +00:00
wuhanstudio	69b809f56b	[Question 1] Import libraries	2024-03-11 18:28:11 +00:00