0% found this document useful (0 votes)
2 views

lesson2

The document discusses neural network training techniques, including the calculation of sum of squared errors and cross-entropy error, as well as mini-batch learning. It introduces numerical differentiation, gradient descent, and the implementation of a two-layer neural network class. Additionally, it covers the process of mini-batch training and evaluating the model using test data.

Uploaded by

Thùy Minh
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

lesson2

The document discusses neural network training techniques, including the calculation of sum of squared errors and cross-entropy error, as well as mini-batch learning. It introduces numerical differentiation, gradient descent, and the implementation of a two-layer neural network class. Additionally, it covers the process of mini-batch training and evaluating the model using test data.

Uploaded by

Thùy Minh
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

lesson2

March 16, 2024

#Chapter 4: Neural Network Training


##Sum of Squared Errors
[ ]: import numpy as np
y = [1.0,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
#y = [1.0,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
t = [0,0,1,0,0,0,0,0,0,0]

def sum_squared_error(y,t):
return 0.5*np.sum((y-t)**2)

sqe = sum_squared_error(np.array(y), np.array(t))

print(sqe)

0.5925
##Cross-Entrpy Error
[ ]: import numpy as np

def cross_entropy_error(y,t):
delta = 1e-7
return -np.sum(t*np.log(y+delta))

y = [1.0,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
t = [0,0,1,0,0,0,0,0,0,0]

cee = cross_entropy_error(np.array(y), np.array(t))


print(cee)

0.510825457099338
• In the example, the output correct label is 0.6 and the crossp-entropy error is 0.51
##Mini-Batch Learning - In neural network training, some training data is selected, and training
is conducted for each group of data, which is called a mini-batch
[ ]: from google.colab import drive
drive.mount('/content/drive')

1
Mounted at /content/drive

[9]: cd /content/drive/MyDrive/GG Colab/Deep Learning/dataset

/content/drive/MyDrive/GG Colab/Deep Learning/dataset

[10]: from my_mnist import load_mnist

[ ]: import sys, os
sys.path.append(os.pardir)
import numpy as np

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,␣


↪one_hot_label=False)

print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000,)
##Numericial Differentiation
[ ]: import numpy as np
import matplotlib.pyplot as plt

def numerical_diff(f,x):
h = 1e-4
return (f(x+h) - f(x-h)) / (2*h)

def function_1(x):
return 0.01*x**2 + 0.1*x

x = np.arange(0.0,20.0, 0.1)
y = function_1(x)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.plot(x,y)
plt.show()

2
##Partial Derivative
[ ]: def function_2(x):
return x[0]**2 + x[1]**2

##Gradient
[ ]: import numpy as np

def function_2(x):
return x[0]**2 + x[1]**2

def numericial_gradient(f,x):
h = 1e-4
grad = np.zeros_like(x) #Tra ve mot mang co kich thuoc giong voi mang da cho␣
↪va bang 0

for idx in range(x.size):


tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

3
x[idx] = tmp_val - h
fxh2 = f(x)

grad[idx] = (fxh1 - fxh2) / (2*h)


x[idx] = tmp_val
return grad

numericial_gradient(function_2, np.array([3.0, 4.0]))

[ ]: array([6., 8.])

##Gradient descent
[ ]: import numpy as np

init_x = np.array([-3.0, 4.0])

def numericial_gradient(f,x):
h = 1e-4
grad = np.zeros_like(x) #Tra ve mot mang co kich thuoc giong voi mang da cho␣
↪va bang 0

for idx in range(x.size):


tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

x[idx] = tmp_val - h
fxh2 = f(x)

grad[idx] = (fxh1 - fxh2) / (2*h)


x[idx] = tmp_val
return grad

def function_2(x):
return x[0]**2 + x[1]**2

def gradient_descent(f, init_x, lr = 0.01, step_num = 100):


x = init_x

for i in range(step_num):
grad = numericial_gradient(f,x)
x -= lr * grad

return x

gradient_descent(function_2, init_x = init_x, lr = 0.1, step_num = 100)

4
[ ]: array([-6.11110793e-10, 8.14814391e-10])

##Gradients for a Neural Network


[3]: from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

[4]: cd /content/drive/MyDrive/GG Colab/Deep Learning/common

/content/drive/MyDrive/GG Colab/Deep Learning/common

[5]: from my_functions import softmax, cross_entropy_error

[6]: from my_gradient import numerical_gradient

[ ]: import sys, os
sys.path.append(os.pardir)
import numpy as np

class simpleNet:
def __init__ (self):
self.W = np.random.randn(2,3)

def predict(self, x):


return np.dot(x,self.W)

def loss(self,x,t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y,t)

return loss
net = simpleNet()
print(net.W)

x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)

t = np.array([0,0,1])
net.loss(x,t)

def f(W):
return net.loss(x,t)

dW = numerical_gradient(f, net.W)

5
print(dW)

[[-1.02003529 0.65014502 0.34236522]


[-0.24540338 -0.74331997 1.15400741]]
[-0.83288421 -0.27890096 1.2440258 ]
[[ 0.05597043 0.09739811 -0.15336855]
[ 0.08395565 0.14609717 -0.23005282]]
##A Two-layer Neural Network as a Class
[17]: import sys, os
sys.path.append(os.pardir)
from my_functions import *
from my_gradient import numerical_gradient

class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.
↪01):

self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size,␣
↪hidden_size)

self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size,␣
↪output_size)

self.params['b2'] = np.zeros(output_size)

def predict(self, x):


W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)

return y

def loss(self, x, t):


y = self.predict(x)

return cross_entropy_error(y,t)

def accuracy(self, x, t):


y = self.predict(x)
y = np.argmax(y, axis = 1)
t = np.argmax(t, axis = 1)

accuracy = np.sum(y == t) / float(x.shape[0])

6
return accuracy

def numerical_gradient(self, x, t):


loss_W = lambda W: self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

return grads

net = TwoLayerNet(input_size = 784, hidden_size = 100, output_size = 10)


net.params['W1'].shape
net.params['b1'].shape
net.params['W2'].shape
net.params['b2'].shape

[17]: (10,)

##Implementing Mini-Batch Training


[ ]: import numpy as np
from my_mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True,␣


↪one_hot_label=True)

train_loss_list = []

inters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size = 784, hidden_size=50, output_size = 10)

for i in range(inters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

grad = network.numerical_gradient(x_batch, t_batch)

for key in ('W1', 'b1', 'W2', 'b2'):


network.params[key] -= learning_rate * grad[key]

7
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)

##Using Test Data for Evaluation


[ ]: import numpy as np
from my_mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True,␣


↪one_hot_label=True)

train_loss_list = []
train_acc_list = []
test_acc_list = []
inter_per_epoch = max(train_size / batch_size, 1)

inters_num = 10000
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet (input_size = 784, hidden_size = 50, output_size = 10)

for i in range (inters_num):


batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

grad = network.numerical_gradient(x_batch, t_batch)

for key in ('W1', 'b1', 'W2', 'b2'):


network.params[key] -= learning_rate * grad[key]

loss = network.loss(x_batch, t_batch)


train_loss_list.append(loss)

if i% inter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + " , " + str(test_acc))

You might also like