dnn-fdtm/dnn-fdtm_ree.ipynb at 1fd28a8653a73240f2a8a7ca923ac4bd119d6434

Prerequisites¶

In [ ]:

!pip install numpy matplotlib pandas sklearn

In [ ]:

!python -V

Data Preprocessing¶

In [8]:

import pickle
import numpy as np

In [9]:

np.set_printoptions(suppress=True)

In [10]:

X = pickle.load( open( "res.pickle", "rb" ) )

In [11]:

X = X.T

In [12]:

X.shape

Out[12]:

(1000000, 493)

In [13]:

layer_1 = np.linspace(10, 250, 100, endpoint=True)
layer_2 = np.linspace(0.5, 10, 100, endpoint=True)
layer_3 = np.linspace(710, 1700, 100, endpoint=True)

In [14]:

y = np.zeros((X.shape[0], 3))

In [15]:

i = 0
for l3 in layer_3:
    for l2 in layer_2:
        for l1 in layer_1:
            y[i] = [l3, l2, l1]
            i = i + 1

In [16]:

y.shape

Out[16]:

(1000000, 3)

HDF5¶

Write to HDF5

In [19]:

import pandas as pd

In [25]:

df_X = pd.DataFrame(X)
df_y = pd.DataFrame(y)

In [6]:

store = pd.HDFStore('res.h5')

INFO:MainThread:numexpr.utils:NumExpr defaulting to 8 threads.

In [26]:

store['X'] = df_X
store['y'] = df_y

In [22]:

store.close()

Read from HDF5

In [23]:

res = pd.read_hdf('res.h5', 'X')

In [37]:

res.shape

Out[37]:

(1000000, 3)

Vaex

In [26]:

import vaex

In [45]:

!python -V

Python 3.8.3

In [51]:

y[:, 0]

Out[51]:

array([ 710.,  710.,  710., ..., 1700., 1700., 1700.])

In [59]:

vaex_df = vaex.from_arrays(x=X, y=y)

In [63]:

X.shape

Out[63]:

(1000000, 493)

In [64]:

y.shape

Out[64]:

(1000000, 3)

In [62]:

vaex_df

Out[62]:

#	x	y
0	'[0.93921708 1. 0.96758555 0.88402555 0....	[710. 0.5 10. ]
1	'[0.9392044 1. 0.96759943 0.88406149 0....	[710. 0.5 12.42424242]
2	'[0.93918573 1. 0.9676289 0.88417059 0....	[710. 0.5 14.84848485]
3	'[0.93915796 1. 0.96770218 0.88444087 0....	[710. 0.5 17.27272727]
4	'[0.93910895 1. 0.96785629 0.88496242 0....	[710. 0.5 19.6969697]
...	...	...
999,995	'[0.90888059 1. 0.99898726 0.9374508 0....	[1700. 10. 240.3030303]
999,996	'[0.90857156 1. 0.99928981 0.93796907 0....	[1700. 10. 242.72727273]
999,997	'[0.90826533 1. 0.99958941 0.93848211 0....	[1700. 10. 245.15151515]
999,998	'[0.90796191 1. 0.99988598 0.93898997 0....	[1700. 10. 247.57575758]
999,999	'[0.90749824 0.99982035 1. 0.93932393 0....	[1700. 10. 250.]

In [61]:

vaex_df.export_hdf5(path='res.hdf5', progress=False)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-61-5408838d5df1> in <module>
----> 1 vaex_df.export_hdf5(path='res.hdf5', progress=False)

D:\Anaconda3\lib\site-packages\vaex\dataframe.py in export_hdf5(self, path, column_names, byteorder, shuffle, selection, progress, virtual, sort, ascending)
   5802         """
   5803         import vaex.export
-> 5804         vaex.export.export_hdf5(self, path, column_names, byteorder, shuffle, selection, progress=progress, virtual=virtual, sort=sort, ascending=ascending)
   5805 
   5806     def export_fits(self, path, column_names=None, shuffle=False, selection=False, progress=None, virtual=True, sort=None, ascending=True):

D:\Anaconda3\lib\site-packages\vaex\export.py in export_hdf5(dataset, path, column_names, byteorder, shuffle, selection, progress, virtual, sort, ascending)
    310     kwargs = locals()
    311     import vaex.hdf5.export
--> 312     vaex.hdf5.export.export_hdf5(**kwargs)
    313 
    314 

D:\Anaconda3\lib\site-packages\vaex\hdf5\export.py in export_hdf5(dataset, path, column_names, byteorder, shuffle, selection, progress, virtual, sort, ascending)
    243     dataset_output = vaex.hdf5.dataset.Hdf5MemoryMapped(path, write=True)
    244 
--> 245     column_names = vaex.export._export(dataset_input=dataset, dataset_output=dataset_output, path=path, random_index_column=random_index_name,
    246                                        column_names=column_names, selection=selection, shuffle=shuffle, byteorder=byteorder,
    247                                        progress=progress, sort=sort, ascending=ascending)

D:\Anaconda3\lib\site-packages\vaex\export.py in _export(dataset_input, dataset_output, random_index_column, path, column_names, byteorder, shuffle, selection, progress, virtual, sort, ascending)
    139         for future in futures:
    140             try:
--> 141                 future.result(0.1/4)
    142             except concurrent.futures.TimeoutError:
    143                 done = False

D:\Anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
    430                 raise CancelledError()
    431             elif self._state == FINISHED:
--> 432                 return self.__get_result()
    433 
    434             self._condition.wait(timeout)

D:\Anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
    386     def __get_result(self):
    387         if self._exception:
--> 388             raise self._exception
    389         else:
    390             return self._result

D:\Anaconda3\lib\concurrent\futures\thread.py in run(self)
     55 
     56         try:
---> 57             result = self.fn(*self.args, **self.kwargs)
     58         except BaseException as exc:
     59             self.future.set_exception(exc)

D:\Anaconda3\lib\site-packages\vaex\export.py in _export_column(dataset_input, dataset_output, column_name, shuffle, sort, selection, N, order_array, order_array_inverse, progress_status)
    211                             to_array[target_set_item] = values.filled(fill_value)
    212                         else:
--> 213                             to_array[target_set_item] = values
    214                         to_offset += no_values
    215 

ValueError: could not broadcast input array from shape (100000,493) into shape (100000)

Train test split¶

In [10]:

from sklearn.model_selection import train_test_split

In [11]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)

In [12]:

import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

In [55]:

property = 2

In [56]:

x_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train[:, property]).float()

x_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test[:, property]).float()

In [57]:

# Builds dataset with ALL data
origin_train_dataset = TensorDataset(x_train_tensor, y_train_tensor)

# Splits randomly into train and validation datasets
train_dataset, val_dataset = random_split(origin_train_dataset, [int(x_train_tensor.shape[0] * 0.9), int(x_train_tensor.shape[0] * 0.1)])

# Builds a loader for each dataset to perform mini-batch gradient descent
train_loader = DataLoader(dataset=train_dataset, batch_size=2000)
val_loader = DataLoader(dataset=val_dataset, batch_size=2000)

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader  = DataLoader(dataset=test_dataset, batch_size=2000)

Model¶

In [58]:

import torch.nn as nn

In [59]:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.bn1 = nn.BatchNorm1d(X.shape[1])
        self.fc1 = nn.Linear(X.shape[1], 100)
        self.bn2 = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.fc4 = nn.Linear(10, 1)

    def forward(self, x):
        x = self.bn1(x)
        x = self.fc1(x)
        x = torch.tanh(x)
        x = self.bn2(x)
        x = self.fc2(x)
        x = torch.tanh(x)
        x = self.fc3(x)
        x = torch.relu(x)
        x = self.fc4(x)
        x = torch.relu(x)
        return x

Training¶

In [60]:

import torch.optim as optim

In [61]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [62]:

n_epochs = 20

In [63]:

def make_train_step(model, loss_fn, optimizer):
    def train_step(x, y):
        model.train()
        yh = model(x)
        yh = torch.reshape(yh, (-1,))
        loss = loss_fn(y, yh)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()
    return train_step

In [64]:

model = Net().to(device)

loss_fn = nn.MSELoss(reduction='mean')

# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)

train_step = make_train_step(model, loss_fn, optimizer)

In [65]:

model.eval()

Out[65]:

Net(
  (bn1): BatchNorm1d(493, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=493, out_features=100, bias=True)
  (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=100, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=10, bias=True)
  (fc4): Linear(in_features=10, out_features=1, bias=True)
)

In [ ]:

training_losses = []
validation_losses = []

for epoch in range(n_epochs):
    batch_losses = []
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        loss = train_step(x_batch, y_batch)
        batch_losses.append(loss)
    training_loss = np.mean(batch_losses)
    training_losses.append(training_loss)

    with torch.no_grad():
        val_losses = []
        for x_val, y_val in val_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            model.eval()
            yh = model(x_val)
            yh = torch.reshape(yh, (-1,))
            val_loss = loss_fn(y_val, yh).item()
            val_losses.append(val_loss)
        validation_loss = np.mean(val_losses)
        validation_losses.append(validation_loss)

    print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")

[1] Training loss: 17419.833	 Validation loss: 11374.671
[2] Training loss: 5437.492	 Validation loss: 981.141
[3] Training loss: 170.984	 Validation loss: 17.300
[4] Training loss: 16.513	 Validation loss: 14.237
[5] Training loss: 14.340	 Validation loss: 9.781
[6] Training loss: 12.084	 Validation loss: 9.245
[7] Training loss: 10.989	 Validation loss: 9.115
[8] Training loss: 10.344	 Validation loss: 9.222
[9] Training loss: 9.963	 Validation loss: 7.374
[10] Training loss: 9.699	 Validation loss: 9.329
[11] Training loss: 9.408	 Validation loss: 9.093
[12] Training loss: 9.157	 Validation loss: 10.942
[13] Training loss: 9.157	 Validation loss: 9.646
[14] Training loss: 9.015	 Validation loss: 8.723
[15] Training loss: 8.873	 Validation loss: 8.430
[16] Training loss: 8.702	 Validation loss: 8.981
[17] Training loss: 8.599	 Validation loss: 9.332

In [ ]:

# model.state_dict()

Testing¶

In [26]:

def mean_absolute_percentage_error(y_true, y_pred):
    return torch.mean(torch.abs((y_true - y_pred) / y_true)) * 100

In [27]:

x_test_tensor = x_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)
y_pred = model(x_test_tensor).squeeze()

In [28]:

test_loss = loss_fn(y_test_tensor, y_pred)
print(test_loss)

tensor(3279.8179, grad_fn=<MseLossBackward>)

In [92]:

print(f"The mean of absolute percentage error for L1: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")

The mean of absolute percentage error for L1: 2.51%

In [31]:

print(f"The mean of absolute percentage error for L2: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")

The mean of absolute percentage error for L2: 3.12%

In [29]:

print(f"The mean of absolute percentage error for L3: {mean_absolute_percentage_error(y_test_tensor.cpu(), y_pred.cpu()):.2f}%")

The mean of absolute percentage error for L3: 3.09%

Real-world case¶

In [31]:

import pandas as pd

In [44]:

case1 = pd.read_csv('case1.txt', header=None)

In [45]:

case1 = np.reshape(np.array(case1), [1, -1])

In [46]:

case1_tensor = torch.from_numpy(case1).float()

In [94]:

model.eval()
pred_1 = model(case1_tensor)

In [95]:

pred_1

Out[95]:

tensor([[150.0873]], grad_fn=<ReluBackward0>)

In [53]:

pred_1

Out[53]:

tensor([[1.1310]], grad_fn=<ReluBackward0>)

In [75]:

pred_1

Out[75]:

tensor([[1472.1879]], grad_fn=<ReluBackward0>)

In [ ]:

case1 = pd.read_csv('case2.txt', header=None)
case1 = np.reshape(np.array(case1), [1, -1])
case1_tensor = torch.from_numpy(case1).float()
model.eval()
pred_1 = model(case1_tensor)

In [ ]:

pred_1

In [54]:

pred_1

Out[54]:

tensor([[2.4505]], grad_fn=<ReluBackward0>)

In [35]:

pred_1

Out[35]:

tensor([[1467.8439]], grad_fn=<ReluBackward0>)

36 KiB Raw Blame History

Prerequisites¶

Data Preprocessing¶

HDF5¶

Train test split¶

Model¶

Training¶

Testing¶

Real-world case¶

36 KiB

Raw Blame History