Data Mining 2
Data Mining 2
Neural Network
# Data handling
import numpy as np
import pandas as pd
# Model
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.initializers import GlorotNormal
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
# Model fitting
history = model.fit(
train_images, train_labels,
epochs=100,
validation_split=0.1
)
# Result
# Assuming 'history' is the object returned from the 'fit'
method
# Plot training & validation accuracy values
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
epochs: 20 으로 고정
train: 50,000
validation: 10,000
test: 10,000
# 모델 컴파일
best_model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
# 모델 훈련
best_model.fit(x_train, y_train,
epochs=20,
validation_data=(x_val, y_val),
verbose=1)
# 테스트 데이터에 대해 모델 평가
test_loss, test_accuracy = best_model.evaluate(x_test,
y_test, verbose=1)
1). Layer 수를 변경
def model_by_layers(trial):
# Hyperparameters to be tuned by Optuna
num_layers = trial.suggest_int('num_layers', 1, 3)
# Model architecture
model = Sequential()
model.add(Dense(units=256, activation='relu',
input_shape=(28 * 28,)))
model.add(Dropout(rate=0.3))
# Hidden layer
for i in range(num_layers):
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(rate=0.3))
model.add(Dense(10, activation='softmax'))
return model
def objective(trial):
# Create a model for this trial
model = model_by_layers(trial)
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)
result_by_layers.groupby('params_num_layers')
[['value']].mean().sort_values('value', ascending=False)
# Predict
test_accuracy = predict(model_by_layers,
best_trial_by_layers)
layer_result_df = pd.DataFrame({'Hyper parameter':
['Layers'], 'Test Accuracy':[test_accuracy]})
result_df = pd.concat([result_df, layer_result_df],
ignore_index=True)
2. Layer 당 unit 의 수를 변경
def model_by_units(trial):
# Hyperparameters to be tuned by Optuna
return model
def objective(trial):
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)
result_by_units.groupby('params_num_units')
[['value']].mean().sort_values('value', ascending=False)
# Predict
test_accuracy = predict(model_by_units, best_trial_by_units)
unit_result_df = pd.DataFrame({'Hyper parameter':['Units'],
'Test Accuracy':[test_accuracy]})
result_df = pd.concat([result_df, unit_result_df],
ignore_index=True)
Hidden layer 2 개 층 (256, 128) 로 고정하고 각 층에서 [0, 0.1, 0.2, 0.3,
0.4, 0.5] dropout 변경하며 성능 확인
def model_by_dropout(trial):
# Hyperparameters to be tuned by Optuna
dropout_rate = trial.suggest_categorical('dropout_rate',
[0, 0.1, 0.2, 0.3, 0.4, 0.5])
# Model architecture
model = Sequential()
# Hidden layer
model.add(Dense(units=256, activation='relu',
input_shape=(28 * 28,)))
model.add(Dropout(rate=dropout_rate))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(rate=dropout_rate))
# Outpuy layer
model.add(Dense(10, activation='softmax'))
return model
def objective(trial):
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
드롭아웃 비율을 탐색
result_by_dropouts.groupby('params_dropout_rate')
[['value']].mean().sort_values('value', ascending=False
# Predict
test_accuracy = predict(model_by_dropout,
best_trial_by_dropouts)
dropout_result_df = pd.DataFrame({'Hyper parameter':
['Dropouts'], 'Test Accuracy':[test_accuracy]})
result_df = pd.concat([result_df, dropout_result_df],
ignore_index=True)
Hidden layer 2 개 층 (256, 128) 로 고정하고 batch size 를 [16, 32, 64,
128]로 변경
def model_by_batch(trial):
# Hyperparameters to be tuned by Optuna
# Model architecture
model = Sequential()
# Hidden layer
model.add(Dense(units=256, activation='relu',
input_shape=(28 * 28,)))
model.add(Dropout(rate=0.3))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(rate=0.3))
# Output layer
model.add(Dense(10, activation='softmax'))
return model
def objective(trial):
# Create a model with the current trial's dropout rate
model = model_by_batch(trial)
# Predict
test_accuracy = predict(model_by_batch, best_trial_by_batch)
batch_result_df = pd.DataFrame({'Hyper parameter':['Batch
size'], 'Test Accuracy':[test_accuracy]})
result_df = pd.concat([result_df, batch_result_df],
ignore_index=True)
def model_by_all(trial):
# Hyperparameters to be tuned by Optuna
num_layers = trial.suggest_int('num_layers', 1, 3)
dropout_rate = trial.suggest_float('dropout_rate', 0.0,
0.5)
num_units = trial.suggest_categorical('num_units', [64,
128, 256, 512])
# Model architecture
model = Sequential()
# Hidden layer
model.add(Dense(units=trial.suggest_int('units_layer_0',
256, 512), activation='relu', input_shape=(28 * 28,)))
model.add(Dropout(rate=dropout_rate))
for i in range(num_layers):
model.add(Dense(units=trial.suggest_int(f'units_layer_{i+1}'
, 32, 128), activation='relu'))
model.add(Dropout(rate=dropout_rate))
model.add(Dense(10, activation='softmax'))
return model
def objective(trial):
# 시각화
tmp_df = result_df.sort_values('Test Accuracy',
ascending=False)
plt.figure(figsize=(8, 6))
plt.bar(tmp_df['Hyper parameter'], tmp_df['Test Accuracy'],
color='navy')
plt.xlabel('Hyper parameter Tuning Method')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy for Different Hyperparameter Tuning
Methods', size=16 )
plt.ylim(0.98, 0.985) # Y 축 범위 설정
plt.show()
시각화로 결과를 나타내었다. 모든 Test Accuracy 가 비슷비슷하긴
하지만, All 이 0.982 에 가까운 가장 좋은 성능을 가진 것을 확인할 수
있다.
Bonus Q1
CNN 을 통한 MNIST 구현
# MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convolu
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(28, 28, 1))) # 흑백이므로 채널이 1
model.add(layers.MaxPooling2D((2, 2))) # pooling
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2))) # pooling
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# 모델 컴파일
# rmsprop: 지수 이동 평균 기법을 적용하여 최근 값의 영향은 더욱
크고, 오래된 값의 영향은 대폭 낮추는 알고리즘
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
# 모델 훈련
model.fit(x_train, y_train, epochs=20, batch_size=64,
validation_data=(x_val, y_val))
tmp_df
# 데이터프레임 시각화
tmp_df = result_df.sort_values('Test Accuracy',
ascending=False)
tmp_df = tmp_df.iloc[:2,]
plt.figure(figsize=(6, 6))
plt.bar(tmp_df['Hyper parameter'], tmp_df['Test Accuracy'],
color='navy')
plt.xlabel('Hyper parameter Tuning Method')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy for Different Model', size=16)
plt.ylim(0.96, 1) # Y 축 범위 설정
plt.show()
딥러닝 용어
출처: https://www.slideshare.net/w0ong/ss-82372826
batch
size 학습 할 때, 샘플을 나누는 단위 (=mini batch)
iteration
batch_size 로 나눠진 샘플에 대해 학습하는 횟수 (파라미터 업데이트
되는 단위)
Example)
train data: 54,000 개
epochs: 100
batch size: 32
라고 한다면 전체 데이터에 대해 100 번 학습하게 되는 것이고, 1epochs
학습에 필요한 iteration 은 54,000/3254,000/32 = 1688 번의 iterations 가
나오고, 실제로 아래와 같이 코드가 구현될 때 확인 가능하다.
CNN
이미지를 Neural Network 로 처리할 때의 문제점?
→ Flatten 과정에서 픽셀 간의 상호 관계가 깨져버림!
→ 배경까지 학습하여, overfitting 의 가능성 증가
CNN 의 배경
🗣
“인간이 이미지를 인식하듯이, 이미지의 패턴, 특징을 추출해서 모델을
학습시키자”
Convolution Layer Fully connected Layer
Padding
Pooling