Compare commits
10 Commits
003c4d2647
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 71d1b26a6c | |||
| 74748ab987 | |||
| b174358c38 | |||
| d7e854fed4 | |||
| 0a8a2a8f86 | |||
| 4ee29e676b | |||
| 292a257ea3 | |||
| 5ae34629d2 | |||
| 8c78f23f98 | |||
| 69b809f56b |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@
|
||||
data/EXCV10
|
||||
data/MaskedFace
|
||||
__pycache__
|
||||
yolov5/
|
||||
129
Question 1.ipynb
129
Question 1.ipynb
@@ -113,7 +113,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.image.AxesImage at 0x1f7c0c56340>"
|
||||
"<matplotlib.image.AxesImage at 0x1a0a1a77340>"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -334,7 +334,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from scipy import ndimage"
|
||||
"from scipy import ndimage\n",
|
||||
"from scipy.ndimage import convolve"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -539,30 +540,68 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_gradient_magnitude(gr_im, kx, ky):\n",
|
||||
" # Ensure the image is a float64 for computation\n",
|
||||
" gr_im_float64 = gr_im.astype(np.float64)\n",
|
||||
" \"\"\"\n",
|
||||
" Compute gradient magnitude of a grey image with given kernels.\n",
|
||||
"\n",
|
||||
" # Compute gradients in x and y direction\n",
|
||||
" grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n",
|
||||
" grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n",
|
||||
" Parameters:\n",
|
||||
" - gr_im: 2D numpy array, input grey image.\n",
|
||||
" - kx: 2D numpy array, horizontal kernel.\n",
|
||||
" - ky: 2D numpy array, vertical kernel.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" - grad_mag: 2D numpy array, gradient magnitude.\n",
|
||||
" \"\"\"\n",
|
||||
" # Validate input gr_im\n",
|
||||
" if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
|
||||
" raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
|
||||
" # Convert inputs to float64 for computation\n",
|
||||
" gr_im = gr_im.astype(np.float64)\n",
|
||||
" kx = kx.astype(np.float64)\n",
|
||||
" ky = ky.astype(np.float64)\n",
|
||||
" \n",
|
||||
" # Compute horizontal and vertical gradients using convolution\n",
|
||||
" grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
|
||||
" grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
|
||||
" \n",
|
||||
" # Compute gradient magnitude\n",
|
||||
" magnitude = np.sqrt(grad_x**2 + grad_y**2)\n",
|
||||
" grad_mag = np.sqrt(grad_x**2 + grad_y**2)\n",
|
||||
" \n",
|
||||
" return magnitude\n",
|
||||
" print(\"Gradient Magnitude Array:\")\n",
|
||||
" print(grad_mag)\n",
|
||||
" \n",
|
||||
" return grad_mag.astype(np.float64)\n",
|
||||
"\n",
|
||||
"def compute_gradient_direction(gr_im, kx, ky):\n",
|
||||
" # Ensure the image is a float64 for computation\n",
|
||||
" gr_im_float64 = gr_im.astype(np.float64)\n",
|
||||
" \"\"\"\n",
|
||||
" Compute gradient direction of a grey image with given kernels.\n",
|
||||
"\n",
|
||||
" # Compute gradients in x and y direction\n",
|
||||
" grad_x = cv2.filter2D(gr_im_float64, -1, kx.astype(np.float64))\n",
|
||||
" grad_y = cv2.filter2D(gr_im_float64, -1, ky.astype(np.float64))\n",
|
||||
" Parameters:\n",
|
||||
" - gr_im: 2D numpy array, input grey image.\n",
|
||||
" - kx: 2D numpy array, horizontal kernel.\n",
|
||||
" - ky: 2D numpy array, vertical kernel.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" - grad_dir: 2D numpy array, gradient direction.\n",
|
||||
" \"\"\"\n",
|
||||
" # Validate input gr_im\n",
|
||||
" if not isinstance(gr_im, np.ndarray) or gr_im.dtype != np.uint8 or gr_im.ndim != 2:\n",
|
||||
" raise ValueError(\"gr_im must be a 2-dimensional numpy array of data type uint8\")\n",
|
||||
" # Convert inputs to float64 for computation\n",
|
||||
" gr_im = gr_im.astype(np.float64)\n",
|
||||
" kx = kx.astype(np.float64)\n",
|
||||
" ky = ky.astype(np.float64)\n",
|
||||
" \n",
|
||||
" # Compute horizontal and vertical gradients using convolution\n",
|
||||
" grad_x = convolve2d(gr_im, kx, mode='same', boundary='symm')\n",
|
||||
" grad_y = convolve2d(gr_im, ky, mode='same', boundary='symm')\n",
|
||||
" \n",
|
||||
" # Compute gradient direction\n",
|
||||
" direction = np.arctan2(grad_y, grad_x)\n",
|
||||
" grad_dir = np.arctan2(grad_y, grad_x)\n",
|
||||
" \n",
|
||||
" return direction"
|
||||
" print(\"Gradient Direction Array:\")\n",
|
||||
" print(grad_dir)\n",
|
||||
" \n",
|
||||
" return grad_dir.astype(np.float64)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -582,15 +621,38 @@
|
||||
"execution_count": 23,
|
||||
"id": "63663950",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Gradient Magnitude Array:\n",
|
||||
"[[0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" ...\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]]\n",
|
||||
"Gradient Direction Array:\n",
|
||||
"[[0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" ...\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]\n",
|
||||
" [0. 0. 0. ... 0. 0. 0.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# For convolution\n",
|
||||
"# magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n",
|
||||
"# direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n",
|
||||
"magnitude = compute_gradient_magnitude(gr_im, kx_conv, ky_conv)\n",
|
||||
"direction = compute_gradient_direction(gr_im, kx_conv, ky_conv)\n",
|
||||
"\n",
|
||||
"# For Cross-Correlation\n",
|
||||
"magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n",
|
||||
"direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)"
|
||||
"# magnitude = compute_gradient_magnitude(gr_im, kx_cross, ky_cross)\n",
|
||||
"# direction = compute_gradient_direction(gr_im, kx_cross, ky_cross)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -628,25 +690,46 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"all_pass = 0\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" assert np.allclose(m1_magnitude, magnitude), np.allclose(m1_direction, direction)\n",
|
||||
" print (\"PASS: Method 1\")\n",
|
||||
" all_pass = all_pass + 1\n",
|
||||
"except AssertionError as e:\n",
|
||||
" print(\"Fail: Method 1\", e)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" assert np.allclose(m2_magnitude, magnitude), np.allclose(m2_direction, direction)\n",
|
||||
" print (\"PASS: Method 2\")\n",
|
||||
" all_pass = all_pass + 1\n",
|
||||
"except AssertionError as e:\n",
|
||||
" print(\"Fail: Method 2\", e)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" assert np.allclose(m3_magnitude, magnitude), np.allclose(m3_direction, direction)\n",
|
||||
" print (\"PASS: Method 3\")\n",
|
||||
" all_pass = all_pass + 1\n",
|
||||
"except AssertionError as e:\n",
|
||||
" print(\"Fail: Method 1\", e)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" assert np.allclose(m4_magnitude, magnitude), np.allclose(m4_direction, direction)\n",
|
||||
" print (\"PASS: Method 4\")\n",
|
||||
" all_pass = all_pass + 1\n",
|
||||
"except AssertionError as e:\n",
|
||||
" print(\"Fail: Method 1\", e)\n",
|
||||
"\n",
|
||||
"print (\"ALL PASS\")"
|
||||
"if all_pass == 4:\n",
|
||||
" print (\"ALL PASS\")\n",
|
||||
"else:\n",
|
||||
" print(f\"{all_pass} Passed, {4 - all_pass} Failed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb616c86",
|
||||
"id": "cc4a6cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
" [0, 0, 1]], dtype=np.float64)\n",
|
||||
"\n",
|
||||
" # Combine the translation and rotation into a single transformation matrix\n",
|
||||
" rotation_matrix = np.dot(np.dot(translation_back, rotation), translation_to_origin)\n",
|
||||
" rotation_matrix = translation_back @ rotation @ translation_to_origin\n",
|
||||
" \n",
|
||||
" return rotation_matrix"
|
||||
]
|
||||
@@ -179,7 +179,7 @@
|
||||
"id": "06417b95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is one common mistake."
|
||||
"This is one common **mistake**."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"from ca_utils import ResNet, BasicBlock"
|
||||
]
|
||||
},
|
||||
@@ -96,7 +97,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"checkpoint = torch.load(\"data/weights_resnet.pth\")\n",
|
||||
"checkpoint = torch.load(\"data/weights_resnet.pth\", map_location=torch.device('cpu'))\n",
|
||||
"\n",
|
||||
"model.load_state_dict(checkpoint)\n",
|
||||
"model.eval()"
|
||||
]
|
||||
@@ -118,7 +120,9 @@
|
||||
"source": [
|
||||
"import torchvision\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
"from torchvision import transforms"
|
||||
"from torchvision import transforms\n",
|
||||
"\n",
|
||||
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,6 +137,13 @@
|
||||
"image_transform = transforms.Compose(\n",
|
||||
" [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n",
|
||||
"\n",
|
||||
"# image_transform = transforms.Compose([\n",
|
||||
"# # transforms.Resize(256),\n",
|
||||
"# # transforms.CenterCrop(224),\n",
|
||||
"# transforms.ToTensor(),\n",
|
||||
"# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
|
||||
"# ])\n",
|
||||
"\n",
|
||||
"test_data = torchvision.datasets.ImageFolder('data/EXCV10/val/', transform=image_transform)\n",
|
||||
"test_loader = DataLoader(test_data, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)"
|
||||
]
|
||||
@@ -163,23 +174,32 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def m1_test_cnn(model, test_loader):\n",
|
||||
" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
||||
"\n",
|
||||
" model.to(device)\n",
|
||||
" model.eval()\n",
|
||||
"\n",
|
||||
" correct = 0\n",
|
||||
" total = 0\n",
|
||||
" all_predicted_labels = []\n",
|
||||
"\n",
|
||||
" with torch.no_grad():\n",
|
||||
" for images, labels in test_loader:\n",
|
||||
"\n",
|
||||
" # Make predictions\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" outputs = model(images)\n",
|
||||
"\n",
|
||||
" _, predicted = torch.max(outputs.data, 1)\n",
|
||||
"\n",
|
||||
" # Save results\n",
|
||||
" total += labels.size(0)\n",
|
||||
" correct += (predicted == labels).sum().item()\n",
|
||||
" \n",
|
||||
" all_predicted_labels.append(predicted.cpu().numpy())\n",
|
||||
"\n",
|
||||
" accuracy = 100 * correct / total\n",
|
||||
" all_predicted_labels = np.concatenate(all_predicted_labels)\n",
|
||||
"\n",
|
||||
" return all_predicted_labels, accuracy"
|
||||
]
|
||||
},
|
||||
@@ -187,13 +207,15 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3fcb0a3a",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Test Accuracy: 67.85%\n"
|
||||
"Test Accuracy: 70.05%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -217,9 +239,24 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_cnn(model, test_loader):\n",
|
||||
" all_predicted_labels, accuracy = 0, 0\n",
|
||||
"def test_cnn(model, test_loader, device='cpu'):\n",
|
||||
" model.to(device)\n",
|
||||
" model.eval() \n",
|
||||
" total = 0\n",
|
||||
" correct_num = 0\n",
|
||||
" all_predicted_labels = []\n",
|
||||
"\n",
|
||||
" with torch.no_grad(): # No need to track gradients for testing\n",
|
||||
" for images, labels in test_loader:\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" outputs = model(images)\n",
|
||||
" _, predicted = torch.max(outputs.data, 1)\n",
|
||||
" total += labels.size(0)\n",
|
||||
" correct_num += (predicted == labels).sum().item() \n",
|
||||
" all_predicted_labels.append(predicted.cpu().numpy())\n",
|
||||
"\n",
|
||||
" accuracy = (correct_num / total) * 100\n",
|
||||
" all_predicted_labels = np.concatenate(all_predicted_labels)\n",
|
||||
" return all_predicted_labels, accuracy"
|
||||
]
|
||||
},
|
||||
@@ -233,7 +270,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Test Accuracy: 0%\n"
|
||||
"Test Accuracy: 70.05%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -242,6 +279,53 @@
|
||||
"print(f'Test Accuracy: {test_accuracy}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "985e4f91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test (Should output ALL PASS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "694097e2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Test accuracy: 70.05\n",
|
||||
"Score 90%: 13.5\n",
|
||||
"ALL PASS\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"assert np.allclose(predicted_labels, m1_predicted_labels)\n",
|
||||
"assert np.allclose(test_accuracy, m1_test_accuracy)\n",
|
||||
"\n",
|
||||
"print(\"Test accuracy: \", test_accuracy)\n",
|
||||
"\n",
|
||||
"if (test_accuracy >= 75):\n",
|
||||
" print(\"Score 100%:\", 15 * 1.0)\n",
|
||||
"elif (test_accuracy >= 70):\n",
|
||||
" print(\"Score 90%:\", 15 * 0.90)\n",
|
||||
"elif (test_accuracy >= 65):\n",
|
||||
" print(\"Score 80%:\", 15 * 0.80)\n",
|
||||
"elif (test_accuracy >= 60):\n",
|
||||
" print(\"Score 70%:\", 15 * 0.70)\n",
|
||||
"elif (test_accuracy >= 55):\n",
|
||||
" print(\"Score 60%:\", 15 * 0.60)\n",
|
||||
"elif (test_accuracy >= 50):\n",
|
||||
" print(\"Score 50%:\", 15 * 0.50)\n",
|
||||
"else:\n",
|
||||
" print(\"Accuracy less than 50%\")\n",
|
||||
"print(\"ALL PASS\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cef7dc17",
|
||||
@@ -252,13 +336,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "0990f3b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"true_labels = []\n",
|
||||
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
||||
"\n",
|
||||
"for images, labels in test_loader:\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
@@ -269,12 +352,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "8da35032",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_confusion_matrix(true, predictions):\n",
|
||||
"def m1_compute_confusion_matrix(true, predictions):\n",
|
||||
" unique_labels = np.unique(np.concatenate((true, predictions)))\n",
|
||||
"\n",
|
||||
" confusion_mat = np.zeros((len(unique_labels), len(unique_labels)), dtype=np.int64)\n",
|
||||
@@ -295,35 +378,101 @@
|
||||
"execution_count": 14,
|
||||
"id": "16b6f9e7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[168, 2, 5, 0, 6, 0, 2, 0, 6, 11],\n",
|
||||
" [ 13, 108, 2, 18, 29, 16, 1, 13, 0, 0],\n",
|
||||
" [ 1, 1, 162, 0, 1, 0, 7, 0, 0, 28],\n",
|
||||
" [ 0, 7, 2, 74, 46, 40, 2, 26, 1, 2],\n",
|
||||
" [ 1, 2, 2, 6, 166, 9, 6, 8, 0, 0],\n",
|
||||
" [ 1, 3, 0, 11, 37, 108, 27, 12, 1, 0],\n",
|
||||
" [ 1, 0, 1, 0, 40, 30, 121, 4, 0, 3],\n",
|
||||
" [ 1, 7, 1, 6, 32, 24, 2, 127, 0, 0],\n",
|
||||
" [ 11, 1, 2, 2, 1, 1, 0, 0, 153, 29],\n",
|
||||
" [ 4, 0, 13, 2, 5, 0, 1, 0, 5, 170]], dtype=int64)"
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"m1_confusion_matrix = m1_compute_confusion_matrix(true_labels, m1_predicted_labels)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "60591999",
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import confusion_matrix\n",
|
||||
"\n",
|
||||
"def m2_compute_confusion_matrix(true, predictions):\n",
|
||||
" return confusion_matrix(true, predictions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "44b131f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"m2_confusion_matrix = m2_compute_confusion_matrix(true_labels, m1_predicted_labels)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd78bea6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Put Students' implementations here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "1dce952c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_confusion_matrix(true, predictions):\n",
|
||||
" unique_labels = np.unique(np.concatenate((true, predictions)))\n",
|
||||
" confusion_matrix = np.zeros((len(unique_labels), len(unique_labels)), dtype=np.int64)\n",
|
||||
" for i, true_label in enumerate(unique_labels):\n",
|
||||
" for j, predicted_label in enumerate(unique_labels):\n",
|
||||
" confusion_matrix[i, j] = np.sum((true == true_label) & (predictions == predicted_label))\n",
|
||||
" return confusion_matrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "1945d637",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"confusion_matrix = compute_confusion_matrix(true_labels, predicted_labels)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e1fd6eb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test (Should output ALL PASS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "6c87f7d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ALL PASS\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compute_confusion_matrix(true_labels, m1_predicted_labels)"
|
||||
"assert np.allclose(m1_confusion_matrix, m2_confusion_matrix)\n",
|
||||
"assert np.allclose(confusion_matrix, m1_confusion_matrix)\n",
|
||||
"\n",
|
||||
"print(\"ALL PASS\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1dce952c",
|
||||
"id": "d9bb6316",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -333,7 +482,7 @@
|
||||
"kernelspec": {
|
||||
"display_name": "what",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "what"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
842
Question 7 - FRCNN.ipynb
Normal file
842
Question 7 - FRCNN.ipynb
Normal file
File diff suppressed because one or more lines are too long
813
Question 7 - ResNet.ipynb
Normal file
813
Question 7 - ResNet.ipynb
Normal file
@@ -0,0 +1,813 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5457f0e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question 7 - ResNet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "868f9566",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import glob\n",
|
||||
"import torch\n",
|
||||
"import numpy as np\n",
|
||||
"from PIL import Image\n",
|
||||
"\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"from collections import Counter\n",
|
||||
"from xml.etree import ElementTree as ET\n",
|
||||
"\n",
|
||||
"from torchvision import transforms, models\n",
|
||||
"from torch.utils.data import Dataset, DataLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4ce3f8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "6e215553",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"val_labels = \"./data/MaskedFace/val/labels\"\n",
|
||||
"val_imgs = \"./data/MaskedFace/val/images\"\n",
|
||||
"\n",
|
||||
"y_true = glob.glob(os.path.join(val_labels,\"*.txt\"))\n",
|
||||
"images = glob.glob(os.path.join(val_imgs,\"*.png\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "94af35ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_dataset = {\n",
|
||||
" 'images': images, # list of image paths\n",
|
||||
" 'y_true': y_true, # list of label paths\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d1af863d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def count_obj(txt_file, n_class):\n",
|
||||
" with open(txt_file, 'r') as file:\n",
|
||||
" lines = file.readlines()\n",
|
||||
" # Extracting the class identifiers from each line\n",
|
||||
" class_ids = [int(line.split()[0]) for line in lines]\n",
|
||||
"\n",
|
||||
" # Counting the occurrences of each class\n",
|
||||
" class_counts = Counter(class_ids)\n",
|
||||
"\n",
|
||||
" # Sorting the dictionary by class id and converting it to a list of counts\n",
|
||||
" sorted_counts = [class_counts[i] if i in class_counts else 0 for i in range(n_class)]\n",
|
||||
" return sorted_counts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a9f5c65f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"85it [00:00, 96.70it/s] \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gt_counts = []\n",
|
||||
"for idx , (img , txt) in enumerate(tqdm(zip(test_dataset['images'], test_dataset['y_true']))):\n",
|
||||
" # get ground truth\n",
|
||||
" obj_count = count_obj(txt, 3)\n",
|
||||
" gt_counts.append(obj_count)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71f5f968",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e70f6949",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Anaconda3\\envs\\what\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"D:\\Anaconda3\\envs\\what\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.\n",
|
||||
" warnings.warn(msg)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ResNet(\n",
|
||||
" (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
|
||||
" (layer1): Sequential(\n",
|
||||
" (0): BasicBlock(\n",
|
||||
" (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" (1): BasicBlock(\n",
|
||||
" (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (layer2): Sequential(\n",
|
||||
" (0): BasicBlock(\n",
|
||||
" (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (downsample): Sequential(\n",
|
||||
" (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
|
||||
" (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (1): BasicBlock(\n",
|
||||
" (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (layer3): Sequential(\n",
|
||||
" (0): BasicBlock(\n",
|
||||
" (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (downsample): Sequential(\n",
|
||||
" (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
|
||||
" (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (1): BasicBlock(\n",
|
||||
" (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (layer4): Sequential(\n",
|
||||
" (0): BasicBlock(\n",
|
||||
" (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (downsample): Sequential(\n",
|
||||
" (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
|
||||
" (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (1): BasicBlock(\n",
|
||||
" (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" (relu): ReLU(inplace=True)\n",
|
||||
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
|
||||
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
|
||||
" (fc): Linear(in_features=512, out_features=3, bias=True)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class ImageDataset(Dataset):\n",
|
||||
" def __init__(self, directory, transformations=None):\n",
|
||||
" self.directory = directory\n",
|
||||
" self.transformations = transformations\n",
|
||||
" self.filenames = [file for file in os.listdir(directory) if file.endswith('.png')]\n",
|
||||
" self.labels_array = np.zeros((len(self.filenames), 3), dtype=np.int64)\n",
|
||||
"\n",
|
||||
" def __len__(self):\n",
|
||||
" return len(self.filenames)\n",
|
||||
"\n",
|
||||
" def __getitem__(self, index):\n",
|
||||
" file_path = os.path.join(self.directory, self.filenames[index])\n",
|
||||
" img = Image.open(file_path).convert('RGB')\n",
|
||||
" labels = self.extract_labels(file_path.replace('.png', '.xml'))\n",
|
||||
" \n",
|
||||
" if self.transformations:\n",
|
||||
" img = self.transformations(img)\n",
|
||||
" \n",
|
||||
" self.labels_array[index] = labels\n",
|
||||
" return img, torch.tensor(labels, dtype=torch.float32)\n",
|
||||
"\n",
|
||||
" def extract_labels(self, xml_path):\n",
|
||||
" xml_data = ET.parse(xml_path)\n",
|
||||
" categories = {'with_mask': 0, 'without_mask': 0, 'mask_weared_incorrect': 0}\n",
|
||||
" for item in xml_data.getroot().findall('object'):\n",
|
||||
" categories[item.find('name').text] += 1\n",
|
||||
" return list(categories.values())\n",
|
||||
"\n",
|
||||
"# Define image transformations\n",
|
||||
"image_transforms = {\n",
|
||||
" 'train': transforms.Compose([\n",
|
||||
" transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),\n",
|
||||
" transforms.RandomRotation(degrees=15),\n",
|
||||
" transforms.ColorJitter(),\n",
|
||||
" transforms.RandomHorizontalFlip(),\n",
|
||||
" transforms.CenterCrop(size=224),\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
|
||||
" ]),\n",
|
||||
" 'val': transforms.Compose([\n",
|
||||
" transforms.Resize(size=256),\n",
|
||||
" transforms.CenterCrop(size=224),\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
|
||||
" ])\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Initialize the pretrained ResNet18 model and modify the fully connected layer\n",
|
||||
"pretrained_model = models.resnet18(pretrained=True)\n",
|
||||
"pretrained_model.fc = torch.nn.Linear(pretrained_model.fc.in_features, 3)\n",
|
||||
"\n",
|
||||
"# Create the dataset and dataloaders\n",
|
||||
"training_data = ImageDataset('data/MaskedFace/train', transformations=image_transforms['train'])\n",
|
||||
"validation_data = ImageDataset('data/MaskedFace/val', transformations=image_transforms['val'])\n",
|
||||
"\n",
|
||||
"train_data_loader = DataLoader(training_data, batch_size=32, shuffle=True)\n",
|
||||
"validation_data_loader = DataLoader(validation_data, batch_size=32)\n",
|
||||
"\n",
|
||||
"# Setup device, loss function, optimizer, and learning rate scheduler\n",
|
||||
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
||||
"pretrained_model.to(device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "61ad7442",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:28<00:00, 6.76s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 1, Loss: 13.687, Validation Loss: 0.191\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:12<00:00, 6.02s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 2, Loss: 10.426, Validation Loss: 0.219\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:18<00:00, 6.28s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 3, Loss: 11.348, Validation Loss: 0.227\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:17<00:00, 6.26s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 4, Loss: 9.872, Validation Loss: 0.163\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:08<00:00, 5.85s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 5, Loss: 8.712, Validation Loss: 0.190\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:08<00:00, 5.84s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 6, Loss: 10.092, Validation Loss: 0.150\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:10<00:00, 5.94s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 7, Loss: 9.503, Validation Loss: 0.321\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:09<00:00, 5.88s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 8, Loss: 6.198, Validation Loss: 0.123\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:09<00:00, 5.87s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 9, Loss: 5.333, Validation Loss: 0.128\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [02:36<00:00, 7.11s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 10, Loss: 4.443, Validation Loss: 0.125\n",
|
||||
"Validation MAE: 12.31%\n",
|
||||
"[[1 2 0]\n",
|
||||
" [8 1 0]\n",
|
||||
" [3 0 1]\n",
|
||||
" ...\n",
|
||||
" [3 0 0]\n",
|
||||
" [1 0 0]\n",
|
||||
" [1 1 0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import copy\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n",
|
||||
"\n",
|
||||
"# Setup device, loss function, optimizer, and learning rate scheduler\n",
|
||||
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
||||
"pretrained_model.to(device)\n",
|
||||
"\n",
|
||||
"loss_function = torch.nn.MSELoss()\n",
|
||||
"optimizer = torch.optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)\n",
|
||||
"learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)\n",
|
||||
"\n",
|
||||
"# Function to evaluate the model's performance on validation data\n",
|
||||
"def evaluate_performance(model, loader):\n",
|
||||
" model.eval()\n",
|
||||
" total_error = 0.0\n",
|
||||
" for imgs, lbls in loader:\n",
|
||||
" imgs, lbls = imgs.to(device), lbls.to(device)\n",
|
||||
" with torch.no_grad():\n",
|
||||
" predictions = model(imgs)\n",
|
||||
" error = mean_absolute_error(lbls.cpu().detach().numpy(), predictions.cpu().detach().numpy(), multioutput='raw_values')\n",
|
||||
" total_error += np.sum(error)\n",
|
||||
" return total_error / len(loader.dataset)\n",
|
||||
"\n",
|
||||
"# Early stopping and model saving setup\n",
|
||||
"best_model_wts = copy.deepcopy(pretrained_model.state_dict())\n",
|
||||
"best_loss = float('inf')\n",
|
||||
"early_stopping_patience = 3\n",
|
||||
"patience_counter = 0\n",
|
||||
"\n",
|
||||
"# Training loop\n",
|
||||
"epochs = 10\n",
|
||||
"for epoch in range(epochs):\n",
|
||||
" pretrained_model.train()\n",
|
||||
" epoch_loss = 0.0\n",
|
||||
" for imgs, lbls in tqdm(train_data_loader):\n",
|
||||
" imgs, lbls = imgs.to(device), lbls.to(device)\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" predictions = pretrained_model(imgs)\n",
|
||||
" loss = loss_function(predictions, lbls)\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" epoch_loss += loss.item()\n",
|
||||
" \n",
|
||||
" learning_rate_scheduler.step()\n",
|
||||
" \n",
|
||||
" # Validation phase\n",
|
||||
" validation_loss = evaluate_performance(pretrained_model, validation_data_loader)\n",
|
||||
" print(f'Epoch {epoch+1}, Loss: {epoch_loss / len(train_data_loader):.3f}, Validation Loss: {validation_loss:.3f}')\n",
|
||||
" \n",
|
||||
" # Check for early stopping\n",
|
||||
" if validation_loss < best_loss:\n",
|
||||
" best_loss = validation_loss\n",
|
||||
" best_model_wts = copy.deepcopy(pretrained_model.state_dict())\n",
|
||||
" torch.save(pretrained_model.state_dict(), 'best_model.pth')\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"# Load the best model weights\n",
|
||||
"pretrained_model.load_state_dict(torch.load('best_model.pth'))\n",
|
||||
"\n",
|
||||
"# Final evaluation on the validation dataset\n",
|
||||
"validation_error = evaluate_performance(pretrained_model, validation_data_loader)\n",
|
||||
"print(f'Validation MAE: {validation_error * 100:.2f}%')\n",
|
||||
"\n",
|
||||
"# Print label counts from the training dataset\n",
|
||||
"print(training_data.labels_array)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab063eb1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Evaluate on the test set"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "19729427",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Function to evaluate the model's performance on validation data\n",
|
||||
"def evaluate_performance(model, loader):\n",
|
||||
" model.eval()\n",
|
||||
" total_error = 0.0\n",
|
||||
" for imgs, lbls in loader:\n",
|
||||
" imgs, lbls = imgs.to(device), lbls.to(device)\n",
|
||||
" with torch.no_grad():\n",
|
||||
" predictions = model(imgs)\n",
|
||||
" error = mean_absolute_error(lbls.cpu().detach().numpy(), predictions.cpu().detach().numpy(), multioutput='raw_values')\n",
|
||||
" print(error)\n",
|
||||
" total_error += np.sum(error)\n",
|
||||
" return total_error / 3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d2b3f825",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[1.5965363 1.3042079 0.25560504]\n",
|
||||
"[1.8177493 1.5732876 0.45420742]\n",
|
||||
"[1.9562395 1.3338923 0.17067692]\n",
|
||||
"Validation MAE: 348.75%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load the best model weights\n",
|
||||
"pretrained_model.load_state_dict(torch.load('best_model.pth'))\n",
|
||||
"\n",
|
||||
"# Final evaluation on the validation dataset\n",
|
||||
"validation_error = evaluate_performance(pretrained_model, validation_data_loader)\n",
|
||||
"print(f'Validation MAE: {validation_error * 100:.2f}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e893f885",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 85/85 [00:11<00:00, 7.45it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"counts = []\n",
|
||||
"for idx , (img , lbls) in enumerate(tqdm(validation_data)):\n",
|
||||
" img, lbls = img.to(device), lbls.to(device)\n",
|
||||
" with torch.no_grad():\n",
|
||||
" predictions = pretrained_model(torch.unsqueeze(img, 0))[0]\n",
|
||||
" counts.append(predictions.detach().numpy())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "16f48e23",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[array([14.919903 , 1.9947946 , 0.65775687], dtype=float32),\n",
|
||||
" array([ 6.8552303 , -0.18041131, 0.07070862], dtype=float32),\n",
|
||||
" array([ 1.0139127 , 0.2854728 , -0.08013925], dtype=float32),\n",
|
||||
" array([4.438932 , 0.7808308 , 0.05505312], dtype=float32),\n",
|
||||
" array([7.2354264 , 3.4551375 , 0.30724907], dtype=float32),\n",
|
||||
" array([5.588563 , 0.6697209 , 0.17777884], dtype=float32),\n",
|
||||
" array([ 1.150365 , 0.6162016 , -0.10613517], dtype=float32),\n",
|
||||
" array([ 8.920831 , 0.6018489 , -0.06503136], dtype=float32),\n",
|
||||
" array([1.95457 , 0.17450362, 0.05267046], dtype=float32),\n",
|
||||
" array([2.5774434 , 0.9776695 , 0.18825674], dtype=float32),\n",
|
||||
" array([ 2.6093178 , 0.48708877, -0.17551954], dtype=float32),\n",
|
||||
" array([ 0.16392124, 0.5478727 , -0.19237904], dtype=float32),\n",
|
||||
" array([ 4.6288757 , 0.1531632 , -0.03170557], dtype=float32),\n",
|
||||
" array([ 1.8586371 , 0.6651823 , -0.02203152], dtype=float32),\n",
|
||||
" array([3.2771707 , 3.1532748 , 0.14557752], dtype=float32),\n",
|
||||
" array([3.8890243 , 2.2504125 , 0.05863352], dtype=float32),\n",
|
||||
" array([10.092557 , 0.9448385 , 0.26969808], dtype=float32),\n",
|
||||
" array([ 0.27157634, 0.17475206, -0.23231247], dtype=float32),\n",
|
||||
" array([ 2.3398385 , 0.6199454 , -0.06315048], dtype=float32),\n",
|
||||
" array([7.9481864 , 0.86970013, 0.3186779 ], dtype=float32),\n",
|
||||
" array([ 5.4592905e+00, 3.0020913e-01, -5.3105988e-03], dtype=float32),\n",
|
||||
" array([ 0.97144395, 0.82078457, -0.06586552], dtype=float32),\n",
|
||||
" array([ 1.3530452 , 0.5240793 , -0.06924771], dtype=float32),\n",
|
||||
" array([1.1931357 , 0.5295599 , 0.20559092], dtype=float32),\n",
|
||||
" array([5.624632 , 0.8383505 , 0.37541458], dtype=float32),\n",
|
||||
" array([ 0.78418005, 0.9187632 , -0.0636546 ], dtype=float32),\n",
|
||||
" array([10.465925 , 1.8872681 , 0.38873053], dtype=float32),\n",
|
||||
" array([ 4.920414 , 1.9515185 , -0.12888059], dtype=float32),\n",
|
||||
" array([1.518215 , 1.5924859 , 0.14355288], dtype=float32),\n",
|
||||
" array([6.9586325, 1.1489052, 0.2852966], dtype=float32),\n",
|
||||
" array([0.5843046 , 1.45111 , 0.00412361], dtype=float32),\n",
|
||||
" array([12.129912 , 1.7866051 , 0.31929207], dtype=float32),\n",
|
||||
" array([38.12094 , 6.549285 , 1.1005894], dtype=float32),\n",
|
||||
" array([ 1.2271879 , 0.2557486 , -0.22623575], dtype=float32),\n",
|
||||
" array([-0.06689173, 0.0394736 , 0.631119 ], dtype=float32),\n",
|
||||
" array([17.32966 , 2.792189 , 0.54758376], dtype=float32),\n",
|
||||
" array([3.3420715 , 0.09269053, 0.02531072], dtype=float32),\n",
|
||||
" array([1.5794499 , 0.42056152, 0.06615666], dtype=float32),\n",
|
||||
" array([20.351597 , 3.7114801, 0.7863975], dtype=float32),\n",
|
||||
" array([8.772988 , 0.9012797 , 0.20384854], dtype=float32),\n",
|
||||
" array([0.8031712 , 0.46975204, 0.10056265], dtype=float32),\n",
|
||||
" array([1.3446803 , 0.8946388 , 0.12165649], dtype=float32),\n",
|
||||
" array([ 0.32257232, -0.06660413, -0.22496015], dtype=float32),\n",
|
||||
" array([3.845796 , 0.8221053 , 0.03321841], dtype=float32),\n",
|
||||
" array([ 0.7769756 , 0.30658063, -0.3144942 ], dtype=float32),\n",
|
||||
" array([0.9002108 , 0.38418356, 0.25538492], dtype=float32),\n",
|
||||
" array([11.137635 , 1.4070593 , 0.46713832], dtype=float32),\n",
|
||||
" array([1.0896404 , 0.3867779 , 0.03269624], dtype=float32),\n",
|
||||
" array([-0.29543436, 0.58017415, -0.08616602], dtype=float32),\n",
|
||||
" array([4.886879 , 1.328992 , 0.08463573], dtype=float32),\n",
|
||||
" array([20.802843 , 2.5175433, 0.1205664], dtype=float32),\n",
|
||||
" array([4.472849 , 1.8497019 , 0.07973102], dtype=float32),\n",
|
||||
" array([3.800993 , 1.2847486 , 0.40869945], dtype=float32),\n",
|
||||
" array([ 3.2214005, 2.3649635, -0.05755 ], dtype=float32),\n",
|
||||
" array([6.194131 , 1.039898 , 0.19118609], dtype=float32),\n",
|
||||
" array([5.946366 , 1.9515687, 0.0739623], dtype=float32),\n",
|
||||
" array([ 1.548485 , -0.26474452, 0.13542093], dtype=float32),\n",
|
||||
" array([-0.12953067, 2.0475016 , 0.12173931], dtype=float32),\n",
|
||||
" array([ 3.2755911 , 2.0698051 , -0.03214201], dtype=float32),\n",
|
||||
" array([ 4.795667 , -0.3839026, -0.324237 ], dtype=float32),\n",
|
||||
" array([1.4601235 , 0.9413236 , 0.15387204], dtype=float32),\n",
|
||||
" array([0.60179263, 0.18167558, 0.06993645], dtype=float32),\n",
|
||||
" array([2.5860176 , 0.96621907, 0.1660994 ], dtype=float32),\n",
|
||||
" array([2.3293552 , 2.248715 , 0.05637825], dtype=float32),\n",
|
||||
" array([1.5858288 , 0.75048965, 0.5053718 ], dtype=float32),\n",
|
||||
" array([4.6874514 , 2.613487 , 0.02177998], dtype=float32),\n",
|
||||
" array([ 3.015262 , 1.2428983 , -0.06558037], dtype=float32),\n",
|
||||
" array([ 5.4304247 , 1.3663604 , -0.18734889], dtype=float32),\n",
|
||||
" array([1.169702 , 0.29014575, 0.07055575], dtype=float32),\n",
|
||||
" array([ 2.785139 , 1.7807665 , -0.14221995], dtype=float32),\n",
|
||||
" array([ 6.0665565e+00, -1.1839047e-03, -2.0407777e-01], dtype=float32),\n",
|
||||
" array([ 4.0390615 , 1.0952463 , -0.17736901], dtype=float32),\n",
|
||||
" array([ 2.0545983 , -1.0606133 , -0.20474596], dtype=float32),\n",
|
||||
" array([14.975636 , 2.6628957 , 0.41037458], dtype=float32),\n",
|
||||
" array([ 1.532108 , 1.0259324 , -0.02336033], dtype=float32),\n",
|
||||
" array([ 1.6325457 , 2.1987557 , -0.23485237], dtype=float32),\n",
|
||||
" array([ 0.9079408 , 0.1572775 , -0.20104134], dtype=float32),\n",
|
||||
" array([ 1.0071435 , 1.1668189 , -0.06868404], dtype=float32),\n",
|
||||
" array([ 1.153094 , 0.40935773, -0.05768288], dtype=float32),\n",
|
||||
" array([0.5880935 , 0.42007735, 0.12577775], dtype=float32),\n",
|
||||
" array([8.898152 , 0.9833183 , 0.27929026], dtype=float32),\n",
|
||||
" array([ 0.46698472, 0.8412469 , -0.2756693 ], dtype=float32),\n",
|
||||
" array([ 2.401714 , 1.1422199 , -0.04599947], dtype=float32),\n",
|
||||
" array([6.7554636 , 0.9809863 , 0.21429788], dtype=float32),\n",
|
||||
" array([ 2.7404675 , 0.83549696, -0.06813517], dtype=float32)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"counts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97afedd6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## MAPE"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2c935860",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_mape(prediction, truth):\n",
|
||||
" mape = np.mean( np.abs(truth - prediction) / np.maximum(truth, np.ones_like(truth)) ) * 100\n",
|
||||
" return mape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ea0405a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MAPE = compute_mape(np.array(counts), gt_counts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "bdda69e3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"68.38530732497205\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(MAPE)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d11e9ede",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Final Score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b7aaaaca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Score: 0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"if MAPE <= 10:\n",
|
||||
" print(\"Score: \", 25*1.0)\n",
|
||||
"elif MAPE <= 15:\n",
|
||||
" print(\"Score: \", 25*0.875)\n",
|
||||
"elif MAPE <= 20:\n",
|
||||
" print(\"Score: \", 25*0.75)\n",
|
||||
"elif MAPE <= 25:\n",
|
||||
" print(\"Score: \", 25*0.625)\n",
|
||||
"elif MAPE <= 30:\n",
|
||||
" print(\"Score: \", 25*0.5)\n",
|
||||
"else:\n",
|
||||
" print(\"Score: \", 0) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bf0f953",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "what",
|
||||
"language": "python",
|
||||
"name": "what"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
907
Question 7 - YOLOv5.ipynb
Normal file
907
Question 7 - YOLOv5.ipynb
Normal file
File diff suppressed because one or more lines are too long
759
Question 7 - YOLOv8.ipynb
Normal file
759
Question 7 - YOLOv8.ipynb
Normal file
@@ -0,0 +1,759 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a619e638",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question 7 - YOLOv8"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cb626037",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import torch\n",
|
||||
"import numpy as np\n",
|
||||
"from collections import Counter\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"from ultralytics import YOLO\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import glob\n",
|
||||
"from tqdm import tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39beaeb6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"data/yolov8.pt\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78f8f8d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f920de25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"val_labels = \"./data/MaskedFace/val/labels\"\n",
|
||||
"val_imgs = \"./data/MaskedFace/val/images\"\n",
|
||||
"\n",
|
||||
"y_true = glob.glob(os.path.join(val_labels,\"*.txt\"))\n",
|
||||
"y_true.sort()\n",
|
||||
"\n",
|
||||
"images = glob.glob(os.path.join(val_imgs,\"*.png\"))\n",
|
||||
"images.sort()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "78f3faca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_dataset = {\n",
|
||||
" 'images': images, # list of image paths\n",
|
||||
" 'y_true': y_true, # list of label paths\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "dace1605",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def count_obj(txt_file, n_class):\n",
|
||||
" with open(txt_file, 'r') as file:\n",
|
||||
" lines = file.readlines()\n",
|
||||
" # Extracting the class identifiers from each line\n",
|
||||
" class_ids = [int(line.split()[0]) for line in lines]\n",
|
||||
"\n",
|
||||
" # Counting the occurrences of each class\n",
|
||||
" class_counts = Counter(class_ids)\n",
|
||||
"\n",
|
||||
" # Sorting the dictionary by class id and converting it to a list of counts\n",
|
||||
" sorted_counts = [class_counts[i] if i in class_counts else 0 for i in range(n_class)]\n",
|
||||
" return sorted_counts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "bfc50534",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"85it [00:00, 7354.03it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gt_counts = []\n",
|
||||
"for idx , (img , txt) in enumerate(tqdm(zip(test_dataset['images'], test_dataset['y_true']))):\n",
|
||||
" # get ground truth\n",
|
||||
" obj_count = count_obj(txt, 3)\n",
|
||||
" gt_counts.append(obj_count)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "44602de6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e5ff04e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = YOLO(MODEL_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ea8aa59",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test on the validation set"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3d15ae87",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from collections import Counter\n",
|
||||
"\n",
|
||||
"def calculate_mape(actual, forecast):\n",
|
||||
" if len(actual) != len(forecast):\n",
|
||||
" raise ValueError(\"The length of actual and forecast arrays must be the same.\")\n",
|
||||
" \n",
|
||||
" n = len(actual)\n",
|
||||
" sum_error = 0\n",
|
||||
" \n",
|
||||
" for a, f in zip(actual, forecast):\n",
|
||||
" sum_error += abs(a - f) / max(a, 1)\n",
|
||||
" \n",
|
||||
" mape_value = (sum_error / n) * 100\n",
|
||||
" return mape_value\n",
|
||||
"\n",
|
||||
"def count_masks(model, dataset):\n",
|
||||
" n_class = 3\n",
|
||||
" mape_scores = []\n",
|
||||
" all_pred_counts = []\n",
|
||||
" all_obj_counts = []\n",
|
||||
" for idx , (img , txt) in enumerate(tqdm(zip(dataset['images'],dataset['y_true']))):\n",
|
||||
" # get predicted list\n",
|
||||
" preds = model.predict(img)\n",
|
||||
" pred = preds[0]\n",
|
||||
" predict_list = [ box.cls[0].item() for box in pred.boxes]\n",
|
||||
" count = Counter(predict_list)\n",
|
||||
" predict_count = [count[i] if i in count else 0 for i in range(n_class)]\n",
|
||||
" # get ground truth\n",
|
||||
" obj_count = count_obj(txt, n_class)\n",
|
||||
" all_obj_counts.append(obj_count)\n",
|
||||
" all_pred_counts.append(predict_count)\n",
|
||||
"\n",
|
||||
" '''\n",
|
||||
" After the model was trained, I just found that I defined the format class in data.yaml is [without_mask, with_mask, mask_weared_incorrect] which is wrong in order. \n",
|
||||
" Therefore, I will swap the true label and predicted label to [with_mask, without_mask, mask_weared_incorrect] in the count_masks function to return the values should respectively indicate the number of faces wearing mask, without mask and incorrectly wearing mask.\n",
|
||||
" The reason why I did not correct the data.yaml and train the model again because of the limitation of time.\n",
|
||||
" '''\n",
|
||||
" all_pred_counts = np.array(all_pred_counts)\n",
|
||||
" all_obj_counts = np.array(all_obj_counts)\n",
|
||||
"\n",
|
||||
"# all_pred_counts[:, [0, 1]] = all_pred_counts[:, [1, 0]]\n",
|
||||
"# all_obj_counts[:, [0, 1]] = all_obj_counts[:, [1, 0]]\n",
|
||||
"\n",
|
||||
" mape_scores = [calculate_mape(a, p) for a, p in zip(all_obj_counts, all_pred_counts)]\n",
|
||||
"\n",
|
||||
" # Convert all_pred_counts to int64 before returning\n",
|
||||
" all_pred_counts = all_pred_counts.astype(np.int64)\n",
|
||||
" \n",
|
||||
" return np.array(all_pred_counts), np.mean(mape_scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "1428b97d",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"0it [00:00, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-023.png: 480x640 1 with_mask, 4.3ms\n",
|
||||
"Speed: 1.5ms preprocess, 4.3ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"1it [00:01, 1.65s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-819.png: 384x640 1 with_mask, 4.4ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-131.png: 448x640 4 with_masks, 2 without_masks, 4.2ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.2ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-032.png: 384x640 2 with_masks, 4.0ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-256.png: 448x640 13 with_masks, 4.0ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-201.png: 384x640 12 with_masks, 4.0ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-610.png: 352x640 5 with_masks, 4.3ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.3ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-143.png: 640x512 1 with_mask, 4.4ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.4ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-323.png: 608x640 2 with_masks, 4.3ms\n",
|
||||
"Speed: 1.1ms preprocess, 4.3ms inference, 0.7ms postprocess per image at shape (1, 3, 608, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-383.png: 640x512 1 with_mask, 4.0ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"10it [00:01, 7.73it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-307.png: 384x640 2 with_masks, 1 mask_weared_incorrect, 4.2ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-011.png: 448x640 26 with_masks, 1 mask_weared_incorrect, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-043.png: 640x448 1 with_mask, 4.2ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 448)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-098.png: 448x640 4 with_masks, 1 without_mask, 4.0ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-368.png: 448x640 9 with_masks, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-802.png: 448x640 4 with_masks, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-461.png: 448x640 8 with_masks, 3.9ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.9ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-574.png: 384x640 5 with_masks, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-226.png: 384x640 1 with_mask, 3.7ms\n",
|
||||
"Speed: 0.7ms preprocess, 3.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-444.png: 416x640 3 with_masks, 4.3ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.3ms inference, 0.7ms postprocess per image at shape (1, 3, 416, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"20it [00:01, 16.99it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-450.png: 480x640 4 with_masks, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-095.png: 640x512 1 without_mask, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-269.png: 480x640 2 with_masks, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-555.png: 384x640 4 with_masks, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-139.png: 384x640 17 with_masks, 2 without_masks, 2 mask_weared_incorrects, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-080.png: 384x640 1 with_mask, 3.8ms\n",
|
||||
"Speed: 0.8ms preprocess, 3.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-371.png: 384x640 1 with_mask, 1 mask_weared_incorrect, 3.8ms\n",
|
||||
"Speed: 0.7ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-554.png: 480x640 5 with_masks, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-660.png: 640x512 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-180.png: 640x512 1 with_mask, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"30it [00:01, 27.19it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-019.png: 384x640 3 with_masks, 2 without_masks, 1 mask_weared_incorrect, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-719.png: 384x640 4 with_masks, 1 without_mask, 3.8ms\n",
|
||||
"Speed: 0.7ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-410.png: 448x640 18 with_masks, 2 without_masks, 1 mask_weared_incorrect, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-796.png: 448x640 12 with_masks, 3.7ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.7ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-765.png: 640x512 1 with_mask, 4.2ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-305.png: 480x640 6 with_masks, 9 without_masks, 4 mask_weared_incorrects, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-528.png: 640x512 1 without_mask, 4.1ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-209.png: 352x640 4 with_masks, 13 without_masks, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-328.png: 640x512 1 with_mask, 4.0ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 512)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"39it [00:02, 36.68it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-833.png: 384x640 2 with_masks, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-512.png: 448x640 8 with_masks, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-286.png: 480x640 3 with_masks, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-370.png: 640x512 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-123.png: 512x640 3 with_masks, 4.6ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.6ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-829.png: 320x640 6 with_masks, 1 without_mask, 4.3ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.3ms inference, 0.7ms postprocess per image at shape (1, 3, 320, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-197.png: 384x640 1 with_mask, 4.2ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-793.png: 640x512 1 with_mask, 4.0ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-562.png: 352x640 1 with_mask, 1 mask_weared_incorrect, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-182.png: 448x640 2 with_masks, 1 mask_weared_incorrect, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"49it [00:02, 47.10it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-149.png: 480x640 7 with_masks, 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-560.png: 448x640 1 with_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-373.png: 448x640 13 with_masks, 2 without_masks, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-248.png: 640x512 1 without_mask, 4.2ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-292.png: 480x640 1 with_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-041.png: 480x640 8 with_masks, 3.9ms\n",
|
||||
"Speed: 1.0ms preprocess, 3.9ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-515.png: 640x512 1 with_mask, 4.1ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-148.png: 608x640 2 with_masks, 3 without_masks, 4.2ms\n",
|
||||
"Speed: 1.2ms preprocess, 4.2ms inference, 0.7ms postprocess per image at shape (1, 3, 608, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-036.png: 384x640 6 with_masks, 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"58it [00:02, 55.21it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-275.png: 640x544 1 with_mask, 4.5ms\n",
|
||||
"Speed: 1.1ms preprocess, 4.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 544)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-690.png: 416x640 19 with_masks, 1 without_mask, 4.2ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.2ms inference, 0.7ms postprocess per image at shape (1, 3, 416, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-581.png: 640x512 1 with_mask, 4.3ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-607.png: 448x640 2 with_masks, 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-227.png: 448x640 11 with_masks, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-184.png: 352x640 16 with_masks, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-387.png: 384x640 5 with_masks, 1 mask_weared_incorrect, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-169.png: 640x512 1 with_mask, 4.1ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-411.png: 448x640 7 with_masks, 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"67it [00:02, 62.86it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-742.png: 640x512 1 with_mask, 4.2ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-621.png: 384x640 3 with_masks, 2 without_masks, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-280.png: 448x640 14 with_masks, 7 without_masks, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-637.png: 384x640 6 with_masks, 4.1ms\n",
|
||||
"Speed: 0.7ms preprocess, 4.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-745.png: 640x512 1 with_mask, 4.1ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 512)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-606.png: 448x640 4 with_masks, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-152.png: 480x640 8 with_masks, 4.1ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-296.png: 384x640 27 with_masks, 15 without_masks, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-699.png: 448x640 1 with_mask, 6 without_masks, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r",
|
||||
"76it [00:02, 69.31it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-094.png: 384x640 5 with_masks, 1 without_mask, 1 mask_weared_incorrect, 4.0ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-130.png: 640x544 2 with_masks, 2 without_masks, 4.0ms\n",
|
||||
"Speed: 1.0ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 544)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-086.png: 480x640 3 with_masks, 1 without_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-589.png: 640x448 1 with_mask, 1 mask_weared_incorrect, 4.2ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 448)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-058.png: 448x640 13 with_masks, 4.0ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-377.png: 480x640 1 with_mask, 4.1ms\n",
|
||||
"Speed: 0.9ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-260.png: 480x640 50 with_masks, 3.8ms\n",
|
||||
"Speed: 0.9ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-594.png: 448x640 8 with_masks, 4.1ms\n",
|
||||
"Speed: 0.8ms preprocess, 4.1ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)\n",
|
||||
"\n",
|
||||
"image 1/1 /home/wuhanstudio/Documents/Marking/Template/data/MaskedFace/val/images/mask-598.png: 640x512 1 with_mask, 3.9ms\n",
|
||||
"Speed: 1.0ms preprocess, 3.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 512)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"85it [00:02, 32.88it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predicted_counts, mape_score = count_masks(model, test_dataset)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67dda1aa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## MAPE"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e7624ff3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_mape(prediction, truth):\n",
|
||||
" mape = np.mean( np.abs(truth - prediction) / np.maximum(truth, np.ones_like(truth)) ) * 100\n",
|
||||
" return mape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fbb7aa74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# X2d0f9f39\n",
|
||||
"# predicted_counts[:, [0, 1]] = predicted_counts[:, [1, 0]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "028f3e71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"predicted_counts[:, [1, 2]] = predicted_counts[:, [2, 1]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "c9176cc8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MAPE = compute_mape(predicted_counts, gt_counts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "828484ae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"133.83205417471694\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(MAPE)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b29e3ba9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Final Score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "9b170114",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Score: 0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"if MAPE <= 10:\n",
|
||||
" print(\"Score: \", 25*1.0)\n",
|
||||
"elif MAPE <= 15:\n",
|
||||
" print(\"Score: \", 25*0.875)\n",
|
||||
"elif MAPE <= 20:\n",
|
||||
" print(\"Score: \", 25*0.75)\n",
|
||||
"elif MAPE <= 25:\n",
|
||||
" print(\"Score: \", 25*0.625)\n",
|
||||
"elif MAPE <= 30:\n",
|
||||
" print(\"Score: \", 25*0.5)\n",
|
||||
"else:\n",
|
||||
" print(\"Score: \", 0) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "258ec405",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "what",
|
||||
"language": "python",
|
||||
"name": "what"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
BIN
data/points.npy
BIN
data/points.npy
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
data/yolov5.pt
Normal file
BIN
data/yolov5.pt
Normal file
Binary file not shown.
BIN
data/yolov8.pt
Normal file
BIN
data/yolov8.pt
Normal file
Binary file not shown.
Reference in New Issue
Block a user