Compare commits

...

2 Commits

Author SHA1 Message Date
wuhanstudio 8c78f23f98 [Question 4-6] Add a good model 2024-03-11 18:32:32 +00:00
wuhanstudio 69b809f56b [Question 1] Import libraries 2024-03-11 18:28:11 +00:00
3 changed files with 289 additions and 66 deletions

View File

@ -113,7 +113,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.image.AxesImage at 0x1f7c0c56340>" "<matplotlib.image.AxesImage at 0x1a0a1a77340>"
] ]
}, },
"execution_count": 6, "execution_count": 6,
@ -334,7 +334,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from scipy import ndimage" "from scipy import ndimage\n",
"from scipy.ndimage import convolve"
] ]
}, },
{ {
@ -539,30 +540,68 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def compute_gradient_magnitude(gr_im, kx, ky):\n", "def compute_gradient_magnitude(gr_im, kx, ky):\n",
" # Ensure the image is a float64 for computation\n", " \"\"\"\n",
" gr_im_float64 = gr_im.astype(np.float64)\n", " Compute gradient magnitude of a grey image with given kernels.\n",
"\n", "\n",
" # Compute gradients in x and y direction\n", " Parameters:\n",
" grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n", " - gr_im: 2D numpy array, input grey image.\n",
" grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n", " - kx: 2D numpy array, horizontal kernel.\n",
" - ky: 2D numpy array, vertical kernel.\n",
"\n", "\n",
" # Compute gradient magnitude\n", " Returns:\n",
" magnitude = np.sqrt(grad_x**2 + grad_y**2)\n", " - grad_mag: 2D numpy array, gradient magnitude.\n",
" \"\"\"\n",
" # Validate input gr_im\n",
" if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
" raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
" # Convert inputs to float64 for computation\n",
" gr_im = gr_im.astype(np.float64)\n",
" kx = kx.astype(np.float64)\n",
" ky = ky.astype(np.float64)\n",
" \n", " \n",
" return magnitude\n", " # Compute horizontal and vertical gradients using convolution\n",
" grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
" grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
" \n",
" # Compute gradient magnitude\n",
" grad_mag = np.sqrt(grad_x**2 + grad_y**2)\n",
" \n",
" print(\"Gradient Magnitude Array:\")\n",
" print(grad_mag)\n",
" \n",
" return grad_mag.astype(np.float64)\n",
"\n", "\n",
"def compute_gradient_direction(gr_im, kx, ky):\n", "def compute_gradient_direction(gr_im, kx, ky):\n",
" # Ensure the image is a float64 for computation\n", " \"\"\"\n",
" gr_im_float64 = gr_im.astype(np.float64)\n", " Compute gradient direction of a grey image with given kernels.\n",
" \n",
" # Compute gradients in x and y direction\n",
" grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n",
" grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n",
"\n", "\n",
" # Compute gradient direction\n", " Parameters:\n",
" direction = np.arctan2(grad_y, grad_x)\n", " - gr_im: 2D numpy array, input grey image.\n",
" - kx: 2D numpy array, horizontal kernel.\n",
" - ky: 2D numpy array, vertical kernel.\n",
"\n",
" Returns:\n",
" - grad_dir: 2D numpy array, gradient direction.\n",
" \"\"\"\n",
" # Validate input gr_im\n",
" if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
" raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
" # Convert inputs to float64 for computation\n",
" gr_im = gr_im.astype(np.float64)\n",
" kx = kx.astype(np.float64)\n",
" ky = ky.astype(np.float64)\n",
" \n", " \n",
" return direction" " # Compute horizontal and vertical gradients using convolution\n",
" grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
" grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
" \n",
" # Compute gradient direction\n",
" grad_dir = np.arctan2(grad_y, grad_x)\n",
" \n",
" print(\"Gradient Direction Array:\")\n",
" print(grad_dir)\n",
" \n",
" return grad_dir.astype(np.float64)\n"
] ]
}, },
{ {
@ -582,15 +621,38 @@
"execution_count": 23, "execution_count": 23,
"id": "63663950", "id": "63663950",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gradient Magnitude Array:\n",
"[[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Gradient Direction Array:\n",
"[[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n"
]
}
],
"source": [ "source": [
"# For convolution\n", "# For convolution\n",
"# magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n", "magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n",
"# direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n", "direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n",
"\n", "\n",
"# For Cross-Correlation\n", "# For Cross-Correlation\n",
"magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n", "# magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n",
"direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)" "# direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)"
] ]
}, },
{ {
@ -628,25 +690,46 @@
} }
], ],
"source": [ "source": [
"assert np.allclose(m1_magnitude, magnitude), np.allclose(m1_direction, direction)\n", "all_pass = 0\n",
"print (\"PASS: Method 1\")\n",
"\n", "\n",
"assert np.allclose(m2_magnitude, magnitude), np.allclose(m2_direction, direction)\n", "try:\n",
"print (\"PASS: Method 2\")\n", " assert np.allclose(m1_magnitude, magnitude), np.allclose(m1_direction, direction)\n",
" print (\"PASS: Method 1\")\n",
" all_pass = all_pass + 1\n",
"except AssertionError as e:\n",
" print(\"Fail: Method 1\", e)\n",
"\n", "\n",
"assert np.allclose(m3_magnitude, magnitude), np.allclose(m3_direction, direction)\n", "try:\n",
"print (\"PASS: Method 3\")\n", " assert np.allclose(m2_magnitude, magnitude), np.allclose(m2_direction, direction)\n",
" print (\"PASS: Method 2\")\n",
" all_pass = all_pass + 1\n",
"except AssertionError as e:\n",
" print(\"Fail: Method 2\", e)\n",
"\n", "\n",
"assert np.allclose(m4_magnitude, magnitude), np.allclose(m4_direction, direction)\n", "try:\n",
"print (\"PASS: Method 4\")\n", " assert np.allclose(m3_magnitude, magnitude), np.allclose(m3_direction, direction)\n",
" print (\"PASS: Method 3\")\n",
" all_pass = all_pass + 1\n",
"except AssertionError as e:\n",
" print(\"Fail: Method 1\", e)\n",
"\n", "\n",
"print (\"ALL PASS\")" "try:\n",
" assert np.allclose(m4_magnitude, magnitude), np.allclose(m4_direction, direction)\n",
" print (\"PASS: Method 4\")\n",
" all_pass = all_pass + 1\n",
"except AssertionError as e:\n",
" print(\"Fail: Method 1\", e)\n",
"\n",
"if all_pass == 4:\n",
" print (\"ALL PASS\")\n",
"else:\n",
" print(f\"{all_pass} Passed, {4 - all_pass} Failed\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "fb616c86", "id": "cc4a6cdb",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []

View File

@ -16,6 +16,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import torch\n", "import torch\n",
"import torch.nn.functional as F\n",
"from ca_utils import ResNet, BasicBlock" "from ca_utils import ResNet, BasicBlock"
] ]
}, },
@ -96,7 +97,8 @@
} }
], ],
"source": [ "source": [
"checkpoint = torch.load(\"data/weights_resnet.pth\")\n", "checkpoint = torch.load(\"data/weights_resnet.pth\", map_location=torch.device('cpu'))\n",
"\n",
"model.load_state_dict(checkpoint)\n", "model.load_state_dict(checkpoint)\n",
"model.eval()" "model.eval()"
] ]
@ -118,7 +120,9 @@
"source": [ "source": [
"import torchvision\n", "import torchvision\n",
"from torch.utils.data import DataLoader\n", "from torch.utils.data import DataLoader\n",
"from torchvision import transforms" "from torchvision import transforms\n",
"\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
] ]
}, },
{ {
@ -163,23 +167,32 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def m1_test_cnn(model, test_loader):\n", "def m1_test_cnn(model, test_loader):\n",
" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "\n",
" model.to(device)\n", " model.to(device)\n",
" model.eval()\n", " model.eval()\n",
"\n",
" correct = 0\n", " correct = 0\n",
" total = 0\n", " total = 0\n",
" all_predicted_labels = []\n", " all_predicted_labels = []\n",
"\n", "\n",
" with torch.no_grad():\n", " with torch.no_grad():\n",
" for images, labels in test_loader:\n", " for images, labels in test_loader:\n",
"\n",
" # Make predictions\n",
" images, labels = images.to(device), labels.to(device)\n", " images, labels = images.to(device), labels.to(device)\n",
" outputs = model(images)\n", " outputs = model(images)\n",
"\n",
" _, predicted = torch.max(outputs.data, 1)\n", " _, predicted = torch.max(outputs.data, 1)\n",
"\n",
" # Save results\n",
" total += labels.size(0)\n", " total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n", " correct += (predicted == labels).sum().item()\n",
" \n",
" all_predicted_labels.append(predicted.cpu().numpy())\n", " all_predicted_labels.append(predicted.cpu().numpy())\n",
"\n",
" accuracy = 100 * correct / total\n", " accuracy = 100 * correct / total\n",
" all_predicted_labels = np.concatenate(all_predicted_labels)\n", " all_predicted_labels = np.concatenate(all_predicted_labels)\n",
"\n",
" return all_predicted_labels, accuracy" " return all_predicted_labels, accuracy"
] ]
}, },
@ -187,13 +200,15 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 8,
"id": "3fcb0a3a", "id": "3fcb0a3a",
"metadata": {}, "metadata": {
"scrolled": false
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Test Accuracy: 67.85%\n" "Test Accuracy: 75.8%\n"
] ]
} }
], ],
@ -218,9 +233,36 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def test_cnn(model, test_loader):\n", "def test_cnn(model, test_loader):\n",
" all_predicted_labels, accuracy = 0, 0\n", " \"\"\"\n",
" Test the trained ResNet model on the test dataset.\n",
"\n", "\n",
" return all_predicted_labels, accuracy" " Args:\n",
" model (nn.Module): The trained ResNet model.\n",
" test_loader (DataLoader): Data loader for the test data.\n",
" \n",
" Returns:\n",
" float: Test accuracy.\n",
" list: Predicted labels.\n",
" list: True labels.\n",
" \"\"\"\n",
" model.eval()\n",
" correct = 0\n",
" total = 0\n",
" predicted_labels = []\n",
" true_labels = []\n",
"\n",
" with torch.no_grad():\n",
" for images, labels in test_loader:\n",
" outputs = model(images)\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" predicted_labels.extend(predicted.tolist())\n",
" true_labels.extend(labels.tolist())\n",
"\n",
" accuracy = correct / total\n",
"\n",
" return predicted_labels, accuracy*100"
] ]
}, },
{ {
@ -233,7 +275,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Test Accuracy: 0%\n" "Test Accuracy: 75.8%\n"
] ]
} }
], ],
@ -242,6 +284,53 @@
"print(f'Test Accuracy: {test_accuracy}%')" "print(f'Test Accuracy: {test_accuracy}%')"
] ]
}, },
{
"cell_type": "markdown",
"id": "17bc1948",
"metadata": {},
"source": [
"### Test (Should output ALL PASS)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "853c4db3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 75.8\n",
"Score 100%: 15.0\n",
"ALL PASS\n"
]
}
],
"source": [
"assert np.allclose(predicted_labels, m1_predicted_labels)\n",
"assert np.allclose(test_accuracy, m1_test_accuracy)\n",
"\n",
"print(\"Test accuracy: \", test_accuracy)\n",
"\n",
"if (test_accuracy >= 75):\n",
" print(\"Score 100%:\", 15 * 1.0)\n",
"elif (test_accuracy >= 70):\n",
" print(\"Score 90%:\", 15 * 0.90)\n",
"elif (test_accuracy >= 65):\n",
" print(\"Score 80%:\", 15 * 0.80)\n",
"elif (test_accuracy >= 60):\n",
" print(\"Score 70%:\", 15 * 0.70)\n",
"elif (test_accuracy >= 55):\n",
" print(\"Score 60%:\", 15 * 0.60)\n",
"elif (test_accuracy >= 50):\n",
" print(\"Score 50%:\", 15 * 0.50)\n",
"else:\n",
" print(\"Accuracy less than 50%\")\n",
"print(\"ALL PASS\")"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "cef7dc17", "id": "cef7dc17",
@ -252,13 +341,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"id": "0990f3b2", "id": "0990f3b2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"true_labels = []\n", "true_labels = []\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"\n", "\n",
"for images, labels in test_loader:\n", "for images, labels in test_loader:\n",
" images, labels = images.to(device), labels.to(device)\n", " images, labels = images.to(device), labels.to(device)\n",
@ -269,12 +357,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"id": "8da35032", "id": "8da35032",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def compute_confusion_matrix(true, predictions):\n", "def m1_compute_confusion_matrix(true, predictions):\n",
" unique_labels = np.unique(np.concatenate((true, predictions)))\n", " unique_labels = np.unique(np.concatenate((true, predictions)))\n",
"\n", "\n",
" confusion_mat = np.zeros((len(unique_labels), len(unique_labels)), dtype=np.int64)\n", " confusion_mat = np.zeros((len(unique_labels), len(unique_labels)), dtype=np.int64)\n",
@ -295,35 +383,87 @@
"execution_count": 14, "execution_count": 14,
"id": "16b6f9e7", "id": "16b6f9e7",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{ "source": [
"data": { "m1_confusion_matrix = m1_compute_confusion_matrix(true_labels, m1_predicted_labels)"
"text/plain": [
"array([[168, 2, 5, 0, 6, 0, 2, 0, 6, 11],\n",
" [ 13, 108, 2, 18, 29, 16, 1, 13, 0, 0],\n",
" [ 1, 1, 162, 0, 1, 0, 7, 0, 0, 28],\n",
" [ 0, 7, 2, 74, 46, 40, 2, 26, 1, 2],\n",
" [ 1, 2, 2, 6, 166, 9, 6, 8, 0, 0],\n",
" [ 1, 3, 0, 11, 37, 108, 27, 12, 1, 0],\n",
" [ 1, 0, 1, 0, 40, 30, 121, 4, 0, 3],\n",
" [ 1, 7, 1, 6, 32, 24, 2, 127, 0, 0],\n",
" [ 11, 1, 2, 2, 1, 1, 0, 0, 153, 29],\n",
" [ 4, 0, 13, 2, 5, 0, 1, 0, 5, 170]], dtype=int64)"
] ]
}, },
"execution_count": 14, {
"cell_type": "markdown",
"id": "608265af",
"metadata": {}, "metadata": {},
"output_type": "execute_result" "source": [
"### Put Students' implementations here"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "1dce952c",
"metadata": {},
"outputs": [],
"source": [
"def compute_confusion_matrix(true_labels, predicted_labels):\n",
"\n",
" # Ensure inputs are NumPy arrays\n",
" true_labels = np.array(true_labels)\n",
" predicted_labels = np.array(predicted_labels)\n",
"\n",
" # Determine the number of classes\n",
" num_classes = len(np.unique(true_labels))\n",
"\n",
" # Initialize the confusion matrix with zeros\n",
" cm = np.zeros((num_classes, num_classes))\n",
"\n",
" # Count occurrences of true-predicted label pairs\n",
" for i in range(len(true_labels)):\n",
" cm[true_labels[i]][predicted_labels[i]] += 1\n",
"\n",
" return cm"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "21917014",
"metadata": {},
"outputs": [],
"source": [
"confusion_matrix = m1_compute_confusion_matrix(true_labels, m1_predicted_labels)"
]
},
{
"cell_type": "markdown",
"id": "935956b7",
"metadata": {},
"source": [
"### Test (Should output ALL PASS)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b77da2e8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ALL PASS\n"
]
} }
], ],
"source": [ "source": [
"compute_confusion_matrix(true_labels, m1_predicted_labels)" "assert np.allclose(confusion_matrix, m1_confusion_matrix)\n",
"\n",
"print(\"ALL PASS\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "1dce952c", "id": "adc0a7c7",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []
@ -333,7 +473,7 @@
"kernelspec": { "kernelspec": {
"display_name": "what", "display_name": "what",
"language": "python", "language": "python",
"name": "python3" "name": "what"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {

Binary file not shown.