0% found this document useful (0 votes)
5 views

Notebook - Deep Neural Networks

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Notebook - Deep Neural Networks

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 28

Deep Neural Networks

April 12, 2024

1 Fine-Tuning Deep Neural Network From Scratch in Python


By Cristian Leo

1.1 Libraries
[53]: import numpy as np # linear algebra

import matplotlib.pyplot as plt # this is used for the plot the graph

from sklearn.datasets import load_digits # this is used for import the dataset
from sklearn.model_selection import train_test_split # to split the data into␣
↪two parts

from sklearn.preprocessing import MinMaxScaler # for normalization


from sklearn.preprocessing import OneHotEncoder # for one hot encoding

import optuna # for hyperparameter tuning

import logging

1
# Set up the logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import warnings # this is used for ignoring the warnings


warnings.filterwarnings("ignore")

1.2 Neural Network Class


[54]: class NeuralNetwork:
"""
A feedforward neural network with a few optimization techniques.

Parameters:
-----------
layers: list of int
The number of neurons in each layer including the input and output layer
loss_func: str
The loss function to use. Options are 'mse' for mean squared error,␣
↪'log_loss' for logistic loss, and 'categorical_crossentropy' for categorical␣

↪crossentropy.

dropout_rate: float
The dropout rate for dropout regularization. Must be between 0 and 1.
grad_clip: float
The gradient clipping threshold.
"""
def __init__(self,
layers,
init_method='glorot_uniform', # 'zeros', 'random',␣
↪'glorot_uniform', 'glorot_normal', 'he_uniform', 'he_normal'

loss_func='mse',
dropout_rate=0.5,
clip_type='value',
grad_clip=5.0
):
self.layers = []
self.loss_func = loss_func
self.dropout_rate = dropout_rate
self.clip_type = clip_type
self.grad_clip = grad_clip
self.init_method = init_method

# Initialize layers
for i in range(len(layers) - 1):
if self.init_method == 'zeros':
weights = np.zeros((layers[i], layers[i + 1]))
elif self.init_method == 'random':

2
weights = np.random.randn(layers[i], layers[i + 1])
elif self.init_method == 'glorot_uniform':
weights = self.glorot_uniform(layers[i], layers[i + 1])
elif self.init_method == 'glorot_normal':
weights = self.glorot_normal(layers[i], layers[i + 1])
elif self.init_method == 'he_uniform':
weights = self.he_uniform(layers[i], layers[i + 1])
elif self.init_method == 'he_normal':
weights = self.he_normal(layers[i], layers[i + 1])

else:
raise ValueError(f'Unknown initialization method {self.
↪init_method}')

self.layers.append({
'weights': weights,
'biases': np.zeros((1, layers[i + 1]))
})

# track loss
self.train_loss = []
self.test_loss = []

def __str__(self):
"""
Print the Neural Network architecture.
"""
structure = f"NN Layout:\nInput Layer: {len(self.layers[0]['weights'])}␣
↪neurons"

for i, layer in enumerate(self.layers[1:]):


if i==len(self.layers):
f"\nOutput Layer: {len(self.layers[-1]['weights'])}␣
↪neurons\nLoss Function: {self.loss_func}"

else:
structure += f"s\nHidden Layer {i+1}: {len(layer['weights'])}␣
↪neurons"

return structure

def glorot_uniform(self, fan_in, fan_out):


"""
Glorot Uniform initialization (also known as Xavier Uniform␣
↪initialization).

Parameters:
-----------
fan_in: int

3
The number of input units in the weight tensor
fan_out: int
The number of output units in the weight tensor

Returns:
--------
numpy array
The initialized weights
"""
limit = np.sqrt(6 / (fan_in + fan_out))
return np.random.uniform(-limit, limit, (fan_in, fan_out))

def he_uniform(self, fan_in, fan_out):


"""
He initialization.

Parameters:
-----------
fan_in: int
The number of input units in the weight tensor
fan_out: int
The number of output units in the weight tensor

Returns:
--------
numpy array
The initialized weights
"""
limit = np.sqrt(2 / fan_in)
return np.random.uniform(-limit, limit, (fan_in, fan_out))

def glorot_normal(self, fan_in, fan_out):


"""
Glorot Normal initialization (also known as Xavier Normal␣
↪initialization).

Parameters:
-----------
fan_in: int
The number of input units in the weight tensor
fan_out: int
The number of output units in the weight tensor

Returns:
--------
numpy array
The initialized weights

4
"""
stddev = np.sqrt(2. / (fan_in + fan_out))
return np.random.normal(0., stddev, size=(fan_in, fan_out))

def he_normal(self, fan_in, fan_out):


"""
He Normal initialization.

Parameters:
-----------
fan_in: int
The number of input units in the weight tensor
fan_out: int
The number of output units in the weight tensor

Returns:
--------
numpy array
The initialized weights
"""
stddev = np.sqrt(2. / fan_in)
return np.random.normal(0., stddev, size=(fan_in, fan_out))

def forward(self, X, is_training=True):


"""
Perform forward propagation.

Parameters:
-----------
X: numpy array
The input data
is_training: bool
Whether the forward pass is for training or testing/prediction

Returns:
--------
numpy array
The predicted output
"""
self.a = [X]
for i, layer in enumerate(self.layers):
z = np.dot(self.a[-1], layer['weights']) + layer['biases']
a = self.sigmoid(z)
if is_training and i < len(self.layers) - 1: # apply dropout to␣
↪all layers except the output layer

dropout_mask = np.random.rand(*a.shape) > self.dropout_rate

5
a *= dropout_mask
self.a.append(a)
return self.a[-1]

def backward(self, X, y, learning_rate):


"""
Perform backpropagation.

Parameters:
-----------
X: numpy array
The input data
y: numpy array
The target output
learning_rate: float
The learning rate
"""
m = X.shape[0]
self.dz = [self.a[-1] - y]
self.gradient_norms = [] # List to store the gradient norms

for i in reversed(range(len(self.layers) - 1)):


self.dz.append(np.dot(self.dz[-1], self.layers[i + 1]['weights'].T)␣
↪* self.sigmoid_derivative(self.a[i + 1]))

self.gradient_norms.append(np.linalg.norm(self.layers[i +␣
↪1]['weights'])) # Compute and store the gradient norm

self.dz = self.dz[::-1]
self.gradient_norms = self.gradient_norms[::-1] # Reverse the list to␣
↪match the order of the layers

for i in range(len(self.layers)):
grads_w = np.dot(self.a[i].T, self.dz[i]) / m
grads_b = np.sum(self.dz[i], axis=0, keepdims=True) / m

# gradient clipping
if self.clip_type == 'value':
grads_w = np.clip(grads_w, -self.grad_clip, self.grad_clip)
grads_b = np.clip(grads_b, -self.grad_clip, self.grad_clip)
elif self.clip_type == 'norm':
grads_w = self.clip_by_norm(grads_w, self.grad_clip)
grads_b = self.clip_by_norm(grads_b, self.grad_clip)

self.layers[i]['weights'] -= learning_rate * grads_w


self.layers[i]['biases'] -= learning_rate * grads_b

def clip_by_norm(self, grads, clip_norm):

6
"""
Clip gradients by norm.

Parameters:
-----------
grads: numpy array
The gradients
clip_norm: float
The threshold for clipping

Returns:
--------
numpy array
The clipped gradients
"""
l2_norm = np.linalg.norm(grads)
if l2_norm > clip_norm:
grads = grads / l2_norm * clip_norm
return grads

def sigmoid(self, x):


"""
Sigmoid activation function.

Parameters:
-----------
x: numpy array
The input data

Returns:
--------
numpy array
The output of the sigmoid function
"""
return 1 / (1 + np.exp(-x))

def sigmoid_derivative(self, x):


"""
Derivative of the sigmoid activation function.

Parameters:
-----------
x: numpy array
The input data

Returns:
--------

7
numpy array
The output of the derivative of the sigmoid function
"""
return x * (1 - x)

1.3 Trainer Class


[55]: class Trainer:
"""
A class to train a neural network.

Parameters:
-----------
model: NeuralNetwork
The neural network model to train
loss_func: str
The loss function to use. Options are 'mse' for mean squared error,␣
↪'log_loss' for logistic loss, and 'categorical_crossentropy' for categorical␣

↪crossentropy.

"""
def __init__(self, model, loss_func='mse'):
self.model = model
self.loss_func = loss_func
self.train_loss = []
self.val_loss = []

def calculate_loss(self, y_true, y_pred):


"""
Calculate the loss.

Parameters:
-----------
y_true: numpy array
The true output
y_pred: numpy array
The predicted output

Returns:
--------
float
The loss
"""
if self.loss_func == 'mse':
return np.mean((y_pred - y_true)**2)
elif self.loss_func == 'log_loss':
return -np.mean(y_true*np.log(y_pred) + (1-y_true)*np.log(1-y_pred))
elif self.loss_func == 'categorical_crossentropy':

8
return -np.mean(y_true*np.log(y_pred))
else:
raise ValueError('Invalid loss function')

def train(self, X_train, y_train, X_val, y_val, epochs, learning_rate,␣


↪early_stopping=True, patience=10):

"""
Train the neural network.

Parameters:
-----------
X_train: numpy array
The training input data
y_train: numpy array
The training target output
X_val: numpy array
The test input data
y_val: numpy array
The test target output
epochs: int
The number of epochs to train the model
learning_rate: float
The learning rate
early_stopping: bool
Whether to stop training early if the test loss doesn't improve for␣
↪a number of epochs

patience: int
The number of epochs to wait for an improvement in the test loss
"""
best_loss = np.inf
epochs_no_improve = 0

for epoch in range(epochs):


self.model.forward(X_train)
self.model.backward(X_train, y_train, learning_rate)
train_loss = self.calculate_loss(y_train, self.model.a[-1])
self.train_loss.append(train_loss)

self.model.forward(X_val)
val_loss = self.calculate_loss(y_val, self.model.a[-1])
self.val_loss.append(val_loss)

# Log the loss and validation loss every 50 epochs


if epoch % 50 == 0:
logger.info(f'Epoch {epoch}: loss = {train_loss:.3f}, val_loss␣
↪= {val_loss:.3f}')

9
# Early stopping
if early_stopping:
if val_loss < best_loss:
best_loss = val_loss
best_weights = [layer['weights'] for layer in self.model.
↪ layers]
epochs_no_improve = 0
else:
epochs_no_improve += 1

if epochs_no_improve == patience:
print('Early stopping!')
# Restore the best weights
for i, layer in enumerate(self.model.layers):
layer['weights'] = best_weights[i]
break

def plot_gradient_norms(self):
for i, gradient_norm in enumerate(self.model.gradient_norms):
plt.plot(gradient_norm, label=f'Layer {i + 1}')

plt.legend()
plt.show()

1.4 Load Dataset


[56]: # Load the digits dataset
digits = load_digits()

# Plot the first 10 images


fig, axes = plt.subplots(2, 5, figsize=(10, 5))
axes = axes.ravel()

for i in range(10):
axes[i].imshow(digits.images[i], cmap='gray')
axes[i].axis('off')
axes[i].set_title(f"Label: {digits.target[i]}")

plt.tight_layout()
plt.show()

10
1.5 Data Preprocessing
[57]: # Preprocess the dataset
scaler = MinMaxScaler()
X = scaler.fit_transform(digits.data)
y = digits.target

# One-hot encode the target output


encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2,␣
↪random_state=42)

# Split the training set into a smaller training set and a validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.
↪2, random_state=42)

1.6 Create Neural Network


[81]: # Create an instance of the NeuralNetwork class
input_size = X.shape[1]
output_size = len(np.unique(y))
loss_func = 'categorical_crossentropy'
init_method = 'glorot_uniform'
epochs = 10000
learning_rate = 0.1
early_stopping = False

11
patience = 200
dropout_rate = 0.1

# Create the NN
nn = NeuralNetwork([input_size, 64, 64, output_size], loss_func=loss_func,␣
↪init_method=init_method, dropout_rate=dropout_rate)

# Print the neural network artchitecture


print(nn)

NN Layout:
Input Layer: 64 neuronss
Hidden Layer 1: 64 neuronss
Hidden Layer 2: 64 neurons

1.7 Train NN
[82]: trainer = Trainer(nn, loss_func)
trainer.train(X_train, y_train, X_val, y_val, epochs=epochs,␣
↪learning_rate=learning_rate, early_stopping=early_stopping,␣

↪patience=patience)

# Convert y_test from one-hot encoding to labels


y_test_labels = np.argmax(y_test, axis=1)

# Evaluate the performance of the neural network


predictions = np.argmax(nn.forward(X_test), axis=1)
accuracy = np.mean(predictions == y_test_labels)
print(f"Accuracy: {accuracy:.2%}")

INFO:__main__:Epoch 0: loss = 0.084, val_loss = 0.111


INFO:__main__:Epoch 50: loss = 0.229, val_loss = 0.231
INFO:__main__:Epoch 100: loss = 0.229, val_loss = 0.231
INFO:__main__:Epoch 150: loss = 0.228, val_loss = 0.228
INFO:__main__:Epoch 200: loss = 0.228, val_loss = 0.229
INFO:__main__:Epoch 250: loss = 0.227, val_loss = 0.225
INFO:__main__:Epoch 300: loss = 0.226, val_loss = 0.226
INFO:__main__:Epoch 350: loss = 0.224, val_loss = 0.225
INFO:__main__:Epoch 400: loss = 0.221, val_loss = 0.224
INFO:__main__:Epoch 450: loss = 0.219, val_loss = 0.220
INFO:__main__:Epoch 500: loss = 0.216, val_loss = 0.219
INFO:__main__:Epoch 550: loss = 0.213, val_loss = 0.215
INFO:__main__:Epoch 600: loss = 0.206, val_loss = 0.210
INFO:__main__:Epoch 650: loss = 0.201, val_loss = 0.205
INFO:__main__:Epoch 700: loss = 0.195, val_loss = 0.195
INFO:__main__:Epoch 750: loss = 0.186, val_loss = 0.189
INFO:__main__:Epoch 800: loss = 0.177, val_loss = 0.184

12
INFO:__main__:Epoch 850: loss = 0.167, val_loss = 0.171
INFO:__main__:Epoch 900: loss = 0.160, val_loss = 0.160
INFO:__main__:Epoch 950: loss = 0.151, val_loss = 0.154
INFO:__main__:Epoch 1000: loss = 0.143, val_loss = 0.146
INFO:__main__:Epoch 1050: loss = 0.137, val_loss = 0.140
INFO:__main__:Epoch 1100: loss = 0.130, val_loss = 0.138
INFO:__main__:Epoch 1150: loss = 0.124, val_loss = 0.128
INFO:__main__:Epoch 1200: loss = 0.120, val_loss = 0.125
INFO:__main__:Epoch 1250: loss = 0.115, val_loss = 0.119
INFO:__main__:Epoch 1300: loss = 0.110, val_loss = 0.113
INFO:__main__:Epoch 1350: loss = 0.108, val_loss = 0.108
INFO:__main__:Epoch 1400: loss = 0.102, val_loss = 0.108
INFO:__main__:Epoch 1450: loss = 0.098, val_loss = 0.104
INFO:__main__:Epoch 1500: loss = 0.097, val_loss = 0.098
INFO:__main__:Epoch 1550: loss = 0.094, val_loss = 0.095
INFO:__main__:Epoch 1600: loss = 0.091, val_loss = 0.092
INFO:__main__:Epoch 1650: loss = 0.087, val_loss = 0.089
INFO:__main__:Epoch 1700: loss = 0.084, val_loss = 0.090
INFO:__main__:Epoch 1750: loss = 0.081, val_loss = 0.083
INFO:__main__:Epoch 1800: loss = 0.079, val_loss = 0.082
INFO:__main__:Epoch 1850: loss = 0.077, val_loss = 0.081
INFO:__main__:Epoch 1900: loss = 0.073, val_loss = 0.077
INFO:__main__:Epoch 1950: loss = 0.071, val_loss = 0.079
INFO:__main__:Epoch 2000: loss = 0.069, val_loss = 0.073
INFO:__main__:Epoch 2050: loss = 0.068, val_loss = 0.073
INFO:__main__:Epoch 2100: loss = 0.067, val_loss = 0.071
INFO:__main__:Epoch 2150: loss = 0.063, val_loss = 0.066
INFO:__main__:Epoch 2200: loss = 0.062, val_loss = 0.069
INFO:__main__:Epoch 2250: loss = 0.062, val_loss = 0.066
INFO:__main__:Epoch 2300: loss = 0.060, val_loss = 0.062
INFO:__main__:Epoch 2350: loss = 0.058, val_loss = 0.064
INFO:__main__:Epoch 2400: loss = 0.056, val_loss = 0.060
INFO:__main__:Epoch 2450: loss = 0.054, val_loss = 0.056
INFO:__main__:Epoch 2500: loss = 0.055, val_loss = 0.059
INFO:__main__:Epoch 2550: loss = 0.053, val_loss = 0.055
INFO:__main__:Epoch 2600: loss = 0.050, val_loss = 0.054
INFO:__main__:Epoch 2650: loss = 0.051, val_loss = 0.052
INFO:__main__:Epoch 2700: loss = 0.051, val_loss = 0.060
INFO:__main__:Epoch 2750: loss = 0.048, val_loss = 0.055
INFO:__main__:Epoch 2800: loss = 0.046, val_loss = 0.048
INFO:__main__:Epoch 2850: loss = 0.047, val_loss = 0.051
INFO:__main__:Epoch 2900: loss = 0.044, val_loss = 0.049
INFO:__main__:Epoch 2950: loss = 0.043, val_loss = 0.045
INFO:__main__:Epoch 3000: loss = 0.045, val_loss = 0.046
INFO:__main__:Epoch 3050: loss = 0.044, val_loss = 0.047
INFO:__main__:Epoch 3100: loss = 0.043, val_loss = 0.042
INFO:__main__:Epoch 3150: loss = 0.042, val_loss = 0.051
INFO:__main__:Epoch 3200: loss = 0.041, val_loss = 0.041

13
INFO:__main__:Epoch 3250: loss = 0.039, val_loss = 0.044
INFO:__main__:Epoch 3300: loss = 0.039, val_loss = 0.041
INFO:__main__:Epoch 3350: loss = 0.039, val_loss = 0.043
INFO:__main__:Epoch 3400: loss = 0.037, val_loss = 0.044
INFO:__main__:Epoch 3450: loss = 0.036, val_loss = 0.038
INFO:__main__:Epoch 3500: loss = 0.035, val_loss = 0.042
INFO:__main__:Epoch 3550: loss = 0.036, val_loss = 0.040
INFO:__main__:Epoch 3600: loss = 0.036, val_loss = 0.040
INFO:__main__:Epoch 3650: loss = 0.033, val_loss = 0.039
INFO:__main__:Epoch 3700: loss = 0.032, val_loss = 0.037
INFO:__main__:Epoch 3750: loss = 0.034, val_loss = 0.039
INFO:__main__:Epoch 3800: loss = 0.033, val_loss = 0.036
INFO:__main__:Epoch 3850: loss = 0.031, val_loss = 0.037
INFO:__main__:Epoch 3900: loss = 0.032, val_loss = 0.033
INFO:__main__:Epoch 3950: loss = 0.031, val_loss = 0.034
INFO:__main__:Epoch 4000: loss = 0.031, val_loss = 0.034
INFO:__main__:Epoch 4050: loss = 0.031, val_loss = 0.035
INFO:__main__:Epoch 4100: loss = 0.030, val_loss = 0.033
INFO:__main__:Epoch 4150: loss = 0.029, val_loss = 0.033
INFO:__main__:Epoch 4200: loss = 0.028, val_loss = 0.034
INFO:__main__:Epoch 4250: loss = 0.027, val_loss = 0.033
INFO:__main__:Epoch 4300: loss = 0.030, val_loss = 0.031
INFO:__main__:Epoch 4350: loss = 0.029, val_loss = 0.031
INFO:__main__:Epoch 4400: loss = 0.028, val_loss = 0.031
INFO:__main__:Epoch 4450: loss = 0.028, val_loss = 0.031
INFO:__main__:Epoch 4500: loss = 0.027, val_loss = 0.029
INFO:__main__:Epoch 4550: loss = 0.026, val_loss = 0.031
INFO:__main__:Epoch 4600: loss = 0.026, val_loss = 0.030
INFO:__main__:Epoch 4650: loss = 0.026, val_loss = 0.029
INFO:__main__:Epoch 4700: loss = 0.027, val_loss = 0.031
INFO:__main__:Epoch 4750: loss = 0.027, val_loss = 0.029
INFO:__main__:Epoch 4800: loss = 0.025, val_loss = 0.029
INFO:__main__:Epoch 4850: loss = 0.024, val_loss = 0.026
INFO:__main__:Epoch 4900: loss = 0.025, val_loss = 0.029
INFO:__main__:Epoch 4950: loss = 0.024, val_loss = 0.026
INFO:__main__:Epoch 5000: loss = 0.025, val_loss = 0.027
INFO:__main__:Epoch 5050: loss = 0.023, val_loss = 0.028
INFO:__main__:Epoch 5100: loss = 0.023, val_loss = 0.024
INFO:__main__:Epoch 5150: loss = 0.024, val_loss = 0.028
INFO:__main__:Epoch 5200: loss = 0.022, val_loss = 0.025
INFO:__main__:Epoch 5250: loss = 0.023, val_loss = 0.028
INFO:__main__:Epoch 5300: loss = 0.024, val_loss = 0.026
INFO:__main__:Epoch 5350: loss = 0.023, val_loss = 0.026
INFO:__main__:Epoch 5400: loss = 0.021, val_loss = 0.025
INFO:__main__:Epoch 5450: loss = 0.022, val_loss = 0.023
INFO:__main__:Epoch 5500: loss = 0.020, val_loss = 0.027
INFO:__main__:Epoch 5550: loss = 0.023, val_loss = 0.025
INFO:__main__:Epoch 5600: loss = 0.022, val_loss = 0.024

14
INFO:__main__:Epoch 5650: loss = 0.022, val_loss = 0.027
INFO:__main__:Epoch 5700: loss = 0.022, val_loss = 0.024
INFO:__main__:Epoch 5750: loss = 0.019, val_loss = 0.022
INFO:__main__:Epoch 5800: loss = 0.021, val_loss = 0.025
INFO:__main__:Epoch 5850: loss = 0.021, val_loss = 0.023
INFO:__main__:Epoch 5900: loss = 0.020, val_loss = 0.023
INFO:__main__:Epoch 5950: loss = 0.019, val_loss = 0.026
INFO:__main__:Epoch 6000: loss = 0.021, val_loss = 0.025
INFO:__main__:Epoch 6050: loss = 0.020, val_loss = 0.023
INFO:__main__:Epoch 6100: loss = 0.019, val_loss = 0.024
INFO:__main__:Epoch 6150: loss = 0.020, val_loss = 0.024
INFO:__main__:Epoch 6200: loss = 0.018, val_loss = 0.026
INFO:__main__:Epoch 6250: loss = 0.020, val_loss = 0.023
INFO:__main__:Epoch 6300: loss = 0.018, val_loss = 0.020
INFO:__main__:Epoch 6350: loss = 0.019, val_loss = 0.023
INFO:__main__:Epoch 6400: loss = 0.019, val_loss = 0.022
INFO:__main__:Epoch 6450: loss = 0.017, val_loss = 0.025
INFO:__main__:Epoch 6500: loss = 0.018, val_loss = 0.021
INFO:__main__:Epoch 6550: loss = 0.019, val_loss = 0.022
INFO:__main__:Epoch 6600: loss = 0.018, val_loss = 0.020
INFO:__main__:Epoch 6650: loss = 0.017, val_loss = 0.023
INFO:__main__:Epoch 6700: loss = 0.018, val_loss = 0.022
INFO:__main__:Epoch 6750: loss = 0.019, val_loss = 0.019
INFO:__main__:Epoch 6800: loss = 0.018, val_loss = 0.024
INFO:__main__:Epoch 6850: loss = 0.018, val_loss = 0.022
INFO:__main__:Epoch 6900: loss = 0.016, val_loss = 0.021
INFO:__main__:Epoch 6950: loss = 0.017, val_loss = 0.022
INFO:__main__:Epoch 7000: loss = 0.016, val_loss = 0.021
INFO:__main__:Epoch 7050: loss = 0.016, val_loss = 0.021
INFO:__main__:Epoch 7100: loss = 0.016, val_loss = 0.022
INFO:__main__:Epoch 7150: loss = 0.016, val_loss = 0.025
INFO:__main__:Epoch 7200: loss = 0.016, val_loss = 0.021
INFO:__main__:Epoch 7250: loss = 0.016, val_loss = 0.020
INFO:__main__:Epoch 7300: loss = 0.016, val_loss = 0.020
INFO:__main__:Epoch 7350: loss = 0.016, val_loss = 0.022
INFO:__main__:Epoch 7400: loss = 0.015, val_loss = 0.020
INFO:__main__:Epoch 7450: loss = 0.015, val_loss = 0.018
INFO:__main__:Epoch 7500: loss = 0.016, val_loss = 0.020
INFO:__main__:Epoch 7550: loss = 0.015, val_loss = 0.021
INFO:__main__:Epoch 7600: loss = 0.016, val_loss = 0.016
INFO:__main__:Epoch 7650: loss = 0.016, val_loss = 0.020
INFO:__main__:Epoch 7700: loss = 0.015, val_loss = 0.021
INFO:__main__:Epoch 7750: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 7800: loss = 0.015, val_loss = 0.015
INFO:__main__:Epoch 7850: loss = 0.013, val_loss = 0.017
INFO:__main__:Epoch 7900: loss = 0.016, val_loss = 0.016
INFO:__main__:Epoch 7950: loss = 0.014, val_loss = 0.020
INFO:__main__:Epoch 8000: loss = 0.014, val_loss = 0.017

15
INFO:__main__:Epoch 8050: loss = 0.014, val_loss = 0.022
INFO:__main__:Epoch 8100: loss = 0.014, val_loss = 0.019
INFO:__main__:Epoch 8150: loss = 0.014, val_loss = 0.020
INFO:__main__:Epoch 8200: loss = 0.014, val_loss = 0.020
INFO:__main__:Epoch 8250: loss = 0.014, val_loss = 0.020
INFO:__main__:Epoch 8300: loss = 0.013, val_loss = 0.020
INFO:__main__:Epoch 8350: loss = 0.014, val_loss = 0.017
INFO:__main__:Epoch 8400: loss = 0.013, val_loss = 0.018
INFO:__main__:Epoch 8450: loss = 0.014, val_loss = 0.019
INFO:__main__:Epoch 8500: loss = 0.013, val_loss = 0.019
INFO:__main__:Epoch 8550: loss = 0.013, val_loss = 0.021
INFO:__main__:Epoch 8600: loss = 0.014, val_loss = 0.018
INFO:__main__:Epoch 8650: loss = 0.013, val_loss = 0.021
INFO:__main__:Epoch 8700: loss = 0.013, val_loss = 0.018
INFO:__main__:Epoch 8750: loss = 0.013, val_loss = 0.019
INFO:__main__:Epoch 8800: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 8850: loss = 0.013, val_loss = 0.018
INFO:__main__:Epoch 8900: loss = 0.012, val_loss = 0.018
INFO:__main__:Epoch 8950: loss = 0.013, val_loss = 0.019
INFO:__main__:Epoch 9000: loss = 0.013, val_loss = 0.015
INFO:__main__:Epoch 9050: loss = 0.013, val_loss = 0.018
INFO:__main__:Epoch 9100: loss = 0.012, val_loss = 0.017
INFO:__main__:Epoch 9150: loss = 0.012, val_loss = 0.016
INFO:__main__:Epoch 9200: loss = 0.012, val_loss = 0.021
INFO:__main__:Epoch 9250: loss = 0.012, val_loss = 0.018
INFO:__main__:Epoch 9300: loss = 0.012, val_loss = 0.017
INFO:__main__:Epoch 9350: loss = 0.012, val_loss = 0.016
INFO:__main__:Epoch 9400: loss = 0.012, val_loss = 0.017
INFO:__main__:Epoch 9450: loss = 0.012, val_loss = 0.016
INFO:__main__:Epoch 9500: loss = 0.011, val_loss = 0.020
INFO:__main__:Epoch 9550: loss = 0.012, val_loss = 0.017
INFO:__main__:Epoch 9600: loss = 0.012, val_loss = 0.018
INFO:__main__:Epoch 9650: loss = 0.012, val_loss = 0.018
INFO:__main__:Epoch 9700: loss = 0.011, val_loss = 0.019
INFO:__main__:Epoch 9750: loss = 0.012, val_loss = 0.017
INFO:__main__:Epoch 9800: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 9850: loss = 0.011, val_loss = 0.015
INFO:__main__:Epoch 9900: loss = 0.012, val_loss = 0.018
INFO:__main__:Epoch 9950: loss = 0.012, val_loss = 0.018
Accuracy: 96.11%

1.8 Plot Loss


[78]: def smooth_curve(points, factor=0.9):
smoothed_points = []
for point in points:
if smoothed_points:

16
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points

smooth_train_loss = smooth_curve(trainer.train_loss)
smooth_val_loss = smooth_curve(trainer.val_loss)

plt.plot(smooth_train_loss, label='Smooth Train Loss')


plt.plot(smooth_val_loss, label='Smooth Val Loss')
plt.title('Smooth Train and Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

17
1.9 Fine-Tune NN
[ ]: def objective(trial):
# Define hyperparameters
n_layers = trial.suggest_int('n_layers', 1, 10)
# n_layers=1
hidden_sizes = [trial.suggest_int(f'hidden_size_{i}', 32, 128) for i in␣
↪range(n_layers)]

dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5) # single␣


↪dropout rate for all layers

learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1e-1)


# epochs = trial.suggest_int('epochs', 500, 10000)
epochs = 10000
init_method = trial.suggest_categorical('init_method', ['glorot_uniform',␣
↪'glorot_normal', 'he_uniform', 'he_normal', 'random'])

clip_type = trial.suggest_categorical('clip_type', ['value', 'norm'])


clip_value = trial.suggest_uniform('clip_value', 0.0, 1.0)

layers = [input_size] + hidden_sizes + [output_size]


# Create and train the neural network
nn = NeuralNetwork(layers=layers, loss_func=loss_func,␣
↪dropout_rate=dropout_rate, init_method=init_method, clip_type=clip_type,␣

↪grad_clip=clip_value)

trainer = Trainer(nn, loss_func)


trainer.train(X_train, y_train, X_test, y_test, epochs, learning_rate,␣
↪early_stopping=False)

# Evaluate the performance of the neural network


predictions = np.argmax(nn.forward(X_test), axis=1)
accuracy = np.mean(predictions == y_test_labels)

return accuracy

# Create a study object and optimize the objective function


study = optuna.create_study(study_name='nn_study', direction='maximize')
study.optimize(objective, n_trials=100)

# Print the best hyperparameters


print(f"Best trial: {study.best_trial.params}")
print(f"Best value: {study.best_trial.value:.3f}")

1.10 Predict
[91]: best_trial = {'n_layers': 1, 'hidden_size_0': 33, 'dropout_rate': 0.
↪001524880086886879, 'learning_rate': 0.09916060658342357, 'init_method':␣

↪'glorot_normal', 'clip_type': 'norm', 'clip_value': 0.9478771616277659}

best_value = 0.978

18
[92]: epochs = 20000
best_nn = NeuralNetwork(layers=[input_size, study.best_trial.
↪params['hidden_size_0'], output_size],

init_method=study.best_trial.params['init_method'],
loss_func=loss_func,
dropout_rate=study.best_trial.params['dropout_rate'],
clip_type=study.best_trial.params['clip_type'],
grad_clip=study.best_trial.params['clip_value'])
best_trainer = Trainer(best_nn, loss_func)
best_trainer.train(X_train, y_train, X_test, y_test, epochs, study.best_trial.
↪params['learning_rate'], early_stopping=False)

# Evaluate the performance of the best neural network


predictions = np.argmax(best_nn.forward(X_test), axis=1)
accuracy = np.mean(predictions == y_test_labels)
print(f"Best accuracy: {accuracy:.2%}")

INFO:__main__:Epoch 0: loss = 0.083, val_loss = 0.093


INFO:__main__:Epoch 50: loss = 0.221, val_loss = 0.221
INFO:__main__:Epoch 100: loss = 0.211, val_loss = 0.211
INFO:__main__:Epoch 150: loss = 0.199, val_loss = 0.199
INFO:__main__:Epoch 200: loss = 0.184, val_loss = 0.184
INFO:__main__:Epoch 250: loss = 0.167, val_loss = 0.167
INFO:__main__:Epoch 300: loss = 0.150, val_loss = 0.151
INFO:__main__:Epoch 350: loss = 0.135, val_loss = 0.135
INFO:__main__:Epoch 400: loss = 0.121, val_loss = 0.122
INFO:__main__:Epoch 450: loss = 0.110, val_loss = 0.110
INFO:__main__:Epoch 500: loss = 0.100, val_loss = 0.100
INFO:__main__:Epoch 550: loss = 0.092, val_loss = 0.092
INFO:__main__:Epoch 600: loss = 0.086, val_loss = 0.085
INFO:__main__:Epoch 650: loss = 0.080, val_loss = 0.079
INFO:__main__:Epoch 700: loss = 0.075, val_loss = 0.074
INFO:__main__:Epoch 750: loss = 0.070, val_loss = 0.070
INFO:__main__:Epoch 800: loss = 0.067, val_loss = 0.066
INFO:__main__:Epoch 850: loss = 0.063, val_loss = 0.062
INFO:__main__:Epoch 900: loss = 0.060, val_loss = 0.059
INFO:__main__:Epoch 950: loss = 0.057, val_loss = 0.056
INFO:__main__:Epoch 1000: loss = 0.055, val_loss = 0.053
INFO:__main__:Epoch 1050: loss = 0.052, val_loss = 0.051
INFO:__main__:Epoch 1100: loss = 0.050, val_loss = 0.050
INFO:__main__:Epoch 1150: loss = 0.048, val_loss = 0.048
INFO:__main__:Epoch 1200: loss = 0.046, val_loss = 0.046
INFO:__main__:Epoch 1250: loss = 0.045, val_loss = 0.044
INFO:__main__:Epoch 1300: loss = 0.043, val_loss = 0.043
INFO:__main__:Epoch 1350: loss = 0.042, val_loss = 0.041
INFO:__main__:Epoch 1400: loss = 0.041, val_loss = 0.040
INFO:__main__:Epoch 1450: loss = 0.039, val_loss = 0.039

19
INFO:__main__:Epoch 1500: loss = 0.038, val_loss = 0.038
INFO:__main__:Epoch 1550: loss = 0.037, val_loss = 0.037
INFO:__main__:Epoch 1600: loss = 0.036, val_loss = 0.036
INFO:__main__:Epoch 1650: loss = 0.035, val_loss = 0.035
INFO:__main__:Epoch 1700: loss = 0.034, val_loss = 0.034
INFO:__main__:Epoch 1750: loss = 0.034, val_loss = 0.033
INFO:__main__:Epoch 1800: loss = 0.033, val_loss = 0.032
INFO:__main__:Epoch 1850: loss = 0.032, val_loss = 0.032
INFO:__main__:Epoch 1900: loss = 0.031, val_loss = 0.031
INFO:__main__:Epoch 1950: loss = 0.031, val_loss = 0.031
INFO:__main__:Epoch 2000: loss = 0.030, val_loss = 0.030
INFO:__main__:Epoch 2050: loss = 0.029, val_loss = 0.029
INFO:__main__:Epoch 2100: loss = 0.029, val_loss = 0.029
INFO:__main__:Epoch 2150: loss = 0.028, val_loss = 0.028
INFO:__main__:Epoch 2200: loss = 0.028, val_loss = 0.028
INFO:__main__:Epoch 2250: loss = 0.027, val_loss = 0.027
INFO:__main__:Epoch 2300: loss = 0.027, val_loss = 0.027
INFO:__main__:Epoch 2350: loss = 0.027, val_loss = 0.027
INFO:__main__:Epoch 2400: loss = 0.026, val_loss = 0.026
INFO:__main__:Epoch 2450: loss = 0.026, val_loss = 0.026
INFO:__main__:Epoch 2500: loss = 0.025, val_loss = 0.025
INFO:__main__:Epoch 2550: loss = 0.025, val_loss = 0.025
INFO:__main__:Epoch 2600: loss = 0.025, val_loss = 0.025
INFO:__main__:Epoch 2650: loss = 0.024, val_loss = 0.024
INFO:__main__:Epoch 2700: loss = 0.024, val_loss = 0.024
INFO:__main__:Epoch 2750: loss = 0.024, val_loss = 0.024
INFO:__main__:Epoch 2800: loss = 0.023, val_loss = 0.023
INFO:__main__:Epoch 2850: loss = 0.023, val_loss = 0.023
INFO:__main__:Epoch 2900: loss = 0.023, val_loss = 0.023
INFO:__main__:Epoch 2950: loss = 0.022, val_loss = 0.023
INFO:__main__:Epoch 3000: loss = 0.022, val_loss = 0.023
INFO:__main__:Epoch 3050: loss = 0.022, val_loss = 0.022
INFO:__main__:Epoch 3100: loss = 0.022, val_loss = 0.022
INFO:__main__:Epoch 3150: loss = 0.021, val_loss = 0.022
INFO:__main__:Epoch 3200: loss = 0.021, val_loss = 0.022
INFO:__main__:Epoch 3250: loss = 0.021, val_loss = 0.022
INFO:__main__:Epoch 3300: loss = 0.021, val_loss = 0.021
INFO:__main__:Epoch 3350: loss = 0.020, val_loss = 0.021
INFO:__main__:Epoch 3400: loss = 0.020, val_loss = 0.021
INFO:__main__:Epoch 3450: loss = 0.020, val_loss = 0.021
INFO:__main__:Epoch 3500: loss = 0.019, val_loss = 0.020
INFO:__main__:Epoch 3550: loss = 0.019, val_loss = 0.020
INFO:__main__:Epoch 3600: loss = 0.019, val_loss = 0.020
INFO:__main__:Epoch 3650: loss = 0.019, val_loss = 0.020
INFO:__main__:Epoch 3700: loss = 0.019, val_loss = 0.020
INFO:__main__:Epoch 3750: loss = 0.019, val_loss = 0.019
INFO:__main__:Epoch 3800: loss = 0.018, val_loss = 0.019
INFO:__main__:Epoch 3850: loss = 0.018, val_loss = 0.019

20
INFO:__main__:Epoch 3900: loss = 0.018, val_loss = 0.019
INFO:__main__:Epoch 3950: loss = 0.018, val_loss = 0.019
INFO:__main__:Epoch 4000: loss = 0.018, val_loss = 0.019
INFO:__main__:Epoch 4050: loss = 0.018, val_loss = 0.019
INFO:__main__:Epoch 4100: loss = 0.017, val_loss = 0.018
INFO:__main__:Epoch 4150: loss = 0.017, val_loss = 0.019
INFO:__main__:Epoch 4200: loss = 0.017, val_loss = 0.019
INFO:__main__:Epoch 4250: loss = 0.017, val_loss = 0.018
INFO:__main__:Epoch 4300: loss = 0.017, val_loss = 0.018
INFO:__main__:Epoch 4350: loss = 0.017, val_loss = 0.018
INFO:__main__:Epoch 4400: loss = 0.017, val_loss = 0.018
INFO:__main__:Epoch 4450: loss = 0.016, val_loss = 0.018
INFO:__main__:Epoch 4500: loss = 0.016, val_loss = 0.018
INFO:__main__:Epoch 4550: loss = 0.016, val_loss = 0.018
INFO:__main__:Epoch 4600: loss = 0.016, val_loss = 0.017
INFO:__main__:Epoch 4650: loss = 0.016, val_loss = 0.018
INFO:__main__:Epoch 4700: loss = 0.016, val_loss = 0.018
INFO:__main__:Epoch 4750: loss = 0.016, val_loss = 0.017
INFO:__main__:Epoch 4800: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 4850: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 4900: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 4950: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 5000: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 5050: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 5100: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 5150: loss = 0.015, val_loss = 0.017
INFO:__main__:Epoch 5200: loss = 0.014, val_loss = 0.017
INFO:__main__:Epoch 5250: loss = 0.015, val_loss = 0.016
INFO:__main__:Epoch 5300: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5350: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5400: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5450: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5500: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5550: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5600: loss = 0.014, val_loss = 0.016
INFO:__main__:Epoch 5650: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 5700: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 5750: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 5800: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 5850: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 5900: loss = 0.013, val_loss = 0.015
INFO:__main__:Epoch 5950: loss = 0.013, val_loss = 0.015
INFO:__main__:Epoch 6000: loss = 0.013, val_loss = 0.017
INFO:__main__:Epoch 6050: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 6100: loss = 0.013, val_loss = 0.016
INFO:__main__:Epoch 6150: loss = 0.013, val_loss = 0.015
INFO:__main__:Epoch 6200: loss = 0.013, val_loss = 0.015
INFO:__main__:Epoch 6250: loss = 0.012, val_loss = 0.015

21
INFO:__main__:Epoch 6300: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6350: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6400: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6450: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6500: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6550: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6600: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6650: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6700: loss = 0.011, val_loss = 0.016
INFO:__main__:Epoch 6750: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6800: loss = 0.011, val_loss = 0.015
INFO:__main__:Epoch 6850: loss = 0.012, val_loss = 0.015
INFO:__main__:Epoch 6900: loss = 0.011, val_loss = 0.015
INFO:__main__:Epoch 6950: loss = 0.011, val_loss = 0.015
INFO:__main__:Epoch 7000: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7050: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7100: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7150: loss = 0.011, val_loss = 0.015
INFO:__main__:Epoch 7200: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7250: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7300: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7350: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7400: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7450: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7500: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7550: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7600: loss = 0.011, val_loss = 0.014
INFO:__main__:Epoch 7650: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7700: loss = 0.010, val_loss = 0.013
INFO:__main__:Epoch 7750: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7800: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7850: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7900: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 7950: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8000: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8050: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8100: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8150: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8200: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8250: loss = 0.010, val_loss = 0.014
INFO:__main__:Epoch 8300: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8350: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8400: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8450: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8500: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 8550: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8600: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8650: loss = 0.009, val_loss = 0.013

22
INFO:__main__:Epoch 8700: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8750: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8800: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 8850: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 8900: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 8950: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9000: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9050: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9100: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9150: loss = 0.009, val_loss = 0.014
INFO:__main__:Epoch 9200: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9250: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9300: loss = 0.009, val_loss = 0.013
INFO:__main__:Epoch 9350: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9400: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9450: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9500: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9550: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9600: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9650: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9700: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9750: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9800: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9850: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9900: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 9950: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10000: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10050: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10100: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10150: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10200: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10250: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10300: loss = 0.008, val_loss = 0.012
INFO:__main__:Epoch 10350: loss = 0.007, val_loss = 0.014
INFO:__main__:Epoch 10400: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10450: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10500: loss = 0.008, val_loss = 0.013
INFO:__main__:Epoch 10550: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10600: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10650: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10700: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10750: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10800: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10850: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10900: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 10950: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11000: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11050: loss = 0.007, val_loss = 0.013

23
INFO:__main__:Epoch 11100: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11150: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11200: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11250: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11300: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11350: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11400: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11450: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11500: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11550: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11600: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11650: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11700: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11750: loss = 0.007, val_loss = 0.013
INFO:__main__:Epoch 11800: loss = 0.007, val_loss = 0.012
INFO:__main__:Epoch 11850: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 11900: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 11950: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12000: loss = 0.006, val_loss = 0.013
INFO:__main__:Epoch 12050: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12100: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12150: loss = 0.006, val_loss = 0.013
INFO:__main__:Epoch 12200: loss = 0.006, val_loss = 0.013
INFO:__main__:Epoch 12250: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12300: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12350: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12400: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12450: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12500: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12550: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12600: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12650: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12700: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12750: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12800: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12850: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12900: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 12950: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13000: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13050: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13100: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13150: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13200: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13250: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13300: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13350: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13400: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13450: loss = 0.006, val_loss = 0.012

24
INFO:__main__:Epoch 13500: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13550: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13600: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13650: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13700: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 13750: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13800: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13850: loss = 0.006, val_loss = 0.012
INFO:__main__:Epoch 13900: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 13950: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14000: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14050: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14100: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14150: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14200: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14250: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14300: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14350: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14400: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14450: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14500: loss = 0.005, val_loss = 0.013
INFO:__main__:Epoch 14550: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14600: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14650: loss = 0.005, val_loss = 0.013
INFO:__main__:Epoch 14700: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14750: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14800: loss = 0.005, val_loss = 0.013
INFO:__main__:Epoch 14850: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14900: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 14950: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15000: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15050: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15100: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15150: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15200: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15250: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15300: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15350: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15400: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15450: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15500: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15550: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15600: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15650: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15700: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15750: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15800: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15850: loss = 0.005, val_loss = 0.012

25
INFO:__main__:Epoch 15900: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 15950: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 16000: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 16050: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 16100: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 16150: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16200: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16250: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16300: loss = 0.005, val_loss = 0.012
INFO:__main__:Epoch 16350: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16400: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16450: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16500: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16550: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16600: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16650: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16700: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16750: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16800: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16850: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16900: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 16950: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17000: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17050: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17100: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17150: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17200: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17250: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17300: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17350: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17400: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17450: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17500: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17550: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17600: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17650: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17700: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17750: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17800: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17850: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17900: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 17950: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18000: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18050: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18100: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18150: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18200: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18250: loss = 0.004, val_loss = 0.012

26
INFO:__main__:Epoch 18300: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18350: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18400: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18450: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18500: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18550: loss = 0.004, val_loss = 0.011
INFO:__main__:Epoch 18600: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18650: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18700: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18750: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18800: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18850: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18900: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 18950: loss = 0.004, val_loss = 0.011
INFO:__main__:Epoch 19000: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19050: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19100: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19150: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19200: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19250: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19300: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19350: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19400: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19450: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19500: loss = 0.003, val_loss = 0.012
INFO:__main__:Epoch 19550: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19600: loss = 0.003, val_loss = 0.013
INFO:__main__:Epoch 19650: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19700: loss = 0.003, val_loss = 0.012
INFO:__main__:Epoch 19750: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19800: loss = 0.003, val_loss = 0.011
INFO:__main__:Epoch 19850: loss = 0.003, val_loss = 0.012
INFO:__main__:Epoch 19900: loss = 0.004, val_loss = 0.012
INFO:__main__:Epoch 19950: loss = 0.003, val_loss = 0.012
Best accuracy: 97.78%

[93]: def smooth_curve(points, factor=0.9):


smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points

smooth_train_loss = smooth_curve(trainer.train_loss)

27
smooth_val_loss = smooth_curve(trainer.val_loss)

plt.plot(smooth_train_loss, label='Smooth Train Loss')


plt.plot(smooth_val_loss, label='Smooth Val Loss')
plt.title('Smooth Train and Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

28

You might also like