24 KiB
24 KiB
None
<html>
<head>
</head>
</html>
Prerequisites¶
In [ ]:
!pip install numpy matplotlib pandas sklearn
In [1]:
!python -V
Data Preprocessing¶
In [3]:
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True)
导入原始 csv 数据
In [6]:
X = pd.read_csv('./data.csv', header=None)
X = X.to_numpy()
原始数据大小 100 00 00 x 300
In [7]:
X.shape
Out[7]:
In [8]:
layer_1 = np.linspace(10, 250, 100, endpoint=True)
layer_2 = np.linspace(0.5, 10, 100, endpoint=True)
layer_3 = np.linspace(710, 1700, 100, endpoint=True)
训练集一共有 100 00 00 条数据,预测 3 个输出 (实际每个模型每次只预测1个值,训练3个模型)
In [9]:
y = np.zeros((X.shape[0], 3))
In [10]:
i = 0
for l3 in layer_3:
for l2 in layer_2:
for l1 in layer_1:
y[i] = [l3, l2, l1]
i = i + 1
In [11]:
y.shape
Out[11]:
Train test split¶
划分训练集,测试集
In [12]:
from sklearn.model_selection import train_test_split
In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
In [14]:
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
指定当前模型预测的值
In [15]:
property = 1
转换数据为 pyrotch 类型
In [16]:
x_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train[:, property]).float()
x_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test[:, property]).float()
In [17]:
# Builds dataset with ALL data
origin_train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
# Splits randomly into train and validation datasets
train_dataset, val_dataset = random_split(origin_train_dataset, [int(x_train_tensor.shape[0] * 0.9), int(x_train_tensor.shape[0] * 0.1)])
# Builds a loader for each dataset to perform mini-batch gradient descent
train_loader = DataLoader(dataset=train_dataset, batch_size=2000)
val_loader = DataLoader(dataset=val_dataset, batch_size=2000)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=2000)
Define the Model¶
In [18]:
import torch.nn as nn
In [19]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.bn1 = nn.BatchNorm1d(X.shape[1])
self.fc1 = nn.Linear(X.shape[1], 100)
self.bn2 = nn.BatchNorm1d(100)
self.fc2 = nn.Linear(100, 50)
self.fc3 = nn.Linear(50, 10)
self.fc4 = nn.Linear(10, 1)
def forward(self, x):
x = self.bn1(x)
x = self.fc1(x)
x = torch.tanh(x)
x = self.bn2(x)
x = self.fc2(x)
x = torch.tanh(x)
x = self.fc3(x)
x = torch.relu(x)
x = self.fc4(x)
x = torch.relu(x)
return x
Training¶
In [20]:
import torch.optim as optim
如果有 GPU 优先用 GPU
In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
In [22]:
n_epochs = 50
In [23]:
def make_train_step(model, loss_fn, optimizer):
def train_step(x, y):
model.train()
yh = model(x)
yh = torch.reshape(yh, (-1,))
loss = loss_fn(y, yh)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
optimizer.step()
optimizer.zero_grad()
return loss.item()
return train_step
In [24]:
model = Net().to(device)
loss_fn = nn.MSELoss(reduction='mean')
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
train_step = make_train_step(model, loss_fn, optimizer)
In [25]:
model.eval()
Out[25]:
In [51]:
training_losses = []
validation_losses = []
for epoch in range(n_epochs):
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
loss = train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
training_losses.append(training_loss)
with torch.no_grad():
val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.to(device)
y_val = y_val.to(device)
model.eval()
yh = model(x_val)
yh = torch.reshape(yh, (-1,))
val_loss = loss_fn(y_val, yh).item()
val_losses.append(val_loss)
validation_loss = np.mean(val_losses)
validation_losses.append(validation_loss)
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
Testing¶
In [53]:
def mean_absolute_percentage_error(y_true, y_pred):
return torch.mean(torch.abs((y_true - y_pred) / y_true)) * 100
In [54]:
x_test_tensor = x_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)
用训练好的模型进行预测
In [ ]:
y_pred = model(x_test_tensor).squeeze()
计算 loss function (MSE)
In [55]:
test_loss = loss_fn(y_test_tensor, y_pred)
print(test_loss)
In [56]:
print(f"The mean of absolute percentage error for C: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")
In [ ]: