36 KiB
36 KiB
None
<html>
<head>
</head>
</html>
Prerequisites¶
In [ ]:
!pip install numpy matplotlib pandas sklearn
In [ ]:
!python -V
Data Preprocessing¶
In [8]:
import pickle
import numpy as np
In [9]:
np.set_printoptions(suppress=True)
In [10]:
X = pickle.load( open( "res.pickle", "rb" ) )
In [11]:
X = X.T
In [12]:
X.shape
Out[12]:
In [13]:
layer_1 = np.linspace(10, 250, 100, endpoint=True)
layer_2 = np.linspace(0.5, 10, 100, endpoint=True)
layer_3 = np.linspace(710, 1700, 100, endpoint=True)
In [14]:
y = np.zeros((X.shape[0], 3))
In [15]:
i = 0
for l3 in layer_3:
for l2 in layer_2:
for l1 in layer_1:
y[i] = [l3, l2, l1]
i = i + 1
In [16]:
y.shape
Out[16]:
HDF5¶
Write to HDF5
In [19]:
import pandas as pd
In [25]:
df_X = pd.DataFrame(X)
df_y = pd.DataFrame(y)
In [6]:
store = pd.HDFStore('res.h5')
In [26]:
store['X'] = df_X
store['y'] = df_y
In [22]:
store.close()
Read from HDF5
In [23]:
res = pd.read_hdf('res.h5', 'X')
In [37]:
res.shape
Out[37]:
Vaex
In [26]:
import vaex
In [45]:
!python -V
In [51]:
y[:, 0]
Out[51]:
In [59]:
vaex_df = vaex.from_arrays(x=X, y=y)
In [63]:
X.shape
Out[63]:
In [64]:
y.shape
Out[64]:
In [62]:
vaex_df
Out[62]:
In [61]:
vaex_df.export_hdf5(path='res.hdf5', progress=False)
Train test split¶
In [10]:
from sklearn.model_selection import train_test_split
In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
In [12]:
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
In [55]:
property = 2
In [56]:
x_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train[:, property]).float()
x_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test[:, property]).float()
In [57]:
# Builds dataset with ALL data
origin_train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
# Splits randomly into train and validation datasets
train_dataset, val_dataset = random_split(origin_train_dataset, [int(x_train_tensor.shape[0] * 0.9), int(x_train_tensor.shape[0] * 0.1)])
# Builds a loader for each dataset to perform mini-batch gradient descent
train_loader = DataLoader(dataset=train_dataset, batch_size=2000)
val_loader = DataLoader(dataset=val_dataset, batch_size=2000)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=2000)
Model¶
In [58]:
import torch.nn as nn
In [59]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.bn1 = nn.BatchNorm1d(X.shape[1])
self.fc1 = nn.Linear(X.shape[1], 100)
self.bn2 = nn.BatchNorm1d(100)
self.fc2 = nn.Linear(100, 50)
self.fc3 = nn.Linear(50, 10)
self.fc4 = nn.Linear(10, 1)
def forward(self, x):
x = self.bn1(x)
x = self.fc1(x)
x = torch.tanh(x)
x = self.bn2(x)
x = self.fc2(x)
x = torch.tanh(x)
x = self.fc3(x)
x = torch.relu(x)
x = self.fc4(x)
x = torch.relu(x)
return x
Training¶
In [60]:
import torch.optim as optim
In [61]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
In [62]:
n_epochs = 20
In [63]:
def make_train_step(model, loss_fn, optimizer):
def train_step(x, y):
model.train()
yh = model(x)
yh = torch.reshape(yh, (-1,))
loss = loss_fn(y, yh)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
optimizer.step()
optimizer.zero_grad()
return loss.item()
return train_step
In [64]:
model = Net().to(device)
loss_fn = nn.MSELoss(reduction='mean')
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
train_step = make_train_step(model, loss_fn, optimizer)
In [65]:
model.eval()
Out[65]:
In [ ]:
training_losses = []
validation_losses = []
for epoch in range(n_epochs):
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
loss = train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
training_losses.append(training_loss)
with torch.no_grad():
val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.to(device)
y_val = y_val.to(device)
model.eval()
yh = model(x_val)
yh = torch.reshape(yh, (-1,))
val_loss = loss_fn(y_val, yh).item()
val_losses.append(val_loss)
validation_loss = np.mean(val_losses)
validation_losses.append(validation_loss)
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
In [ ]:
# model.state_dict()
Testing¶
In [26]:
def mean_absolute_percentage_error(y_true, y_pred):
return torch.mean(torch.abs((y_true - y_pred) / y_true)) * 100
In [27]:
x_test_tensor = x_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)
y_pred = model(x_test_tensor).squeeze()
In [28]:
test_loss = loss_fn(y_test_tensor, y_pred)
print(test_loss)
In [92]:
print(f"The mean of absolute percentage error for L1: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")
In [31]:
print(f"The mean of absolute percentage error for L2: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")
In [29]:
print(f"The mean of absolute percentage error for L3: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")
Real-world case¶
In [31]:
import pandas as pd
In [44]:
case1 = pd.read_csv('case1.txt', header=None)
In [45]:
case1 = np.reshape(np.array(case1), [1, -1])
In [46]:
case1_tensor = torch.from_numpy(case1).float()
In [94]:
model.eval()
pred_1 = model(case1_tensor)
In [95]:
pred_1
Out[95]:
In [53]:
pred_1
Out[53]:
In [75]:
pred_1
Out[75]:
In [ ]:
case1 = pd.read_csv('case2.txt', header=None)
case1 = np.reshape(np.array(case1), [1, -1])
case1_tensor = torch.from_numpy(case1).float()
model.eval()
pred_1 = model(case1_tensor)
In [ ]:
pred_1
In [54]:
pred_1
Out[54]:
In [35]:
pred_1
Out[35]: