0% found this document useful (0 votes)
23 views7 pages

Hyperparameter Tuning

Python tunning

Uploaded by

dharam
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
23 views7 pages

Hyperparameter Tuning

Python tunning

Uploaded by

dharam
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

HYPERPARAMETER TUNING

The process of finding the best set of hyperparameters for


a machine learning model
TYPES: Random Search, Grid Search, Genetic Algorithms,
Bayesian Optimization, etc. But we are going to consider
the manual search and the GridSearchCV techniques.

Hyperparameter Tuning for one model


# Import necessary libraries
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV,
RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import warnings
# Ignore all warnings
warnings.simplefilter("ignore")

# Load the Iris dataset


iris = datasets.load_iris()

# Create a DataFrame using pandas


iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Add the target column to the DataFrame


iris_df['target'] = iris.target

# Display the first few rows of the dataset


print("First few rows of the Iris dataset:")
print(iris_df.head())

First few rows of the Iris dataset:


sepal length (cm) sepal width (cm) petal length (cm) petal width
(cm) \
0 5.1 3.5 1.4
0.2
1 4.9 3.0 1.4
0.2
2 4.7 3.2 1.3
0.2
3 4.6 3.1 1.5
0.2
4 5.0 3.6 1.4
0.2

target
0 0
1 0
2 0
3 0
4 0

# specify the features and the target


X = iris.data
y = iris.target

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.6, random_state=42)

Manual Search
# Choose the model (SVM in this case) with specific hyperparameters
model = SVC(C=100, kernel='rbf', gamma=10)

# Fit your model


model.fit(X_train, y_train)

SVC(C=100, gamma=10)

y_predict = model.predict(X_test)
y_predict

array([1, 2, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0,
2,
0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 1, 0, 0, 2, 1, 0, 0, 2, 2, 1, 1,
0,
0, 1, 1, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 1,
0,
1, 2, 0, 1, 2, 2, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2,
0,
0, 2])

accuracy = accuracy_score(y_test,y_predict)
accuracy
0.9

GridSearchCV
# Define the hyperparameter grid for GridSearchCV
param_grid_gridsearch = {
'C': [0.1, 1, 10, 100],
'kernel': ['linear', 'rbf', 'poly'],
'gamma': [0.01, 0.1, 1, 'auto']
}

# Create a new model for GridSearchCV


model_gridsearch = SVC()

# Perform GridSearchCV
grid_search = GridSearchCV(model_gridsearch,
param_grid=param_grid_gridsearch, scoring='accuracy', cv=5)
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
param_grid={'C': [0.1, 1, 10, 100],
'gamma': [0.01, 0.1, 1, 'auto'],
'kernel': ['linear', 'rbf', 'poly']},
scoring='accuracy')

# Get the best hyperparameters from GridSearchCV


best_params_grid = grid_search.best_params_

# Print the optimal hyperparameters


print("Optimal Hyperparameters from GridSearchCV:")
print(best_params_grid)

Optimal Hyperparameters from GridSearchCV:


{'C': 10, 'gamma': 0.01, 'kernel': 'linear'}

# Train models with the best hyperparameters


best_model_grid = grid_search.best_estimator_

# Evaluate models on the test set


y_pred_grid = best_model_grid.predict(X_test)

# Check the accuracy


accuracy_grid = accuracy_score(y_test, y_pred_grid)
accuracy_grid

0.9777777777777777
Hyperparameter Tuning for Multiple Models

Manual Search
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Define the SVM model


model1 = SVC(C=0.1, kernel='linear', gamma=0.01)

# Fit the model


model1.fit(X_train, y_train)

SVC(C=0.1, gamma=0.01, kernel='linear')

# Predict the test set


y1_predict = model1.predict(X_test)
y1_predict

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0,
2,
0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1,
0,
0, 1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1,
0,
1, 2, 0, 1, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 2,
0,
0, 1])

# Check for the accuracy


accuracy1 = accuracy_score(y_test, y1_predict)
accuracy1

0.9777777777777777

# Define the RF model


model2 = RandomForestClassifier(n_estimators=50, max_depth=10,
min_samples_split=2)

# Fit the model


model2.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, n_estimators=50)
# Predict the test set
y2_predict = model2.predict(X_test)
y2_predict

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0,
2,
0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1,
0,
0, 1, 1, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1,
0,
1, 2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 2,
0,
0, 2])

# Check for the accuracy


accuracy2 = accuracy_score(y_test, y2_predict)
accuracy2

0.9666666666666667

# Define the LR model


model3 = LogisticRegression(C=0.1, penalty='l1', solver='liblinear')

# Fit the model


model3.fit(X_train, y_train)

LogisticRegression(C=0.1, penalty='l1', solver='liblinear')

# Predict the testset


y3_predict = model3.predict(X_test)
y3_predict

array([2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0,
2,
0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 2, 2, 2,
0,
0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2,
0,
2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2,
0,
0, 2])

# Check for the accuracy


accuracy3 = accuracy_score(y_test, y3_predict)
accuracy3

0.6777777777777778
Using GridSearchCV
# Define models
models = {
'SVM': SVC(),
'Random Forest': RandomForestClassifier(),
'Logistic Regression': LogisticRegression()
}

# Define hyperparameter grids for each model


param_grid = {
'SVM': {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf',
'poly'], 'gamma': [0.01, 0.1, 1, 'auto']},
'Random Forest': {'n_estimators': [10, 50, 100, 200], 'max_depth':
[None, 10, 20, 30], 'min_samples_split': [2, 5, 10]},
'Logistic Regression': {'C': [0.1, 1, 10, 100], 'penalty': ['l1',
'l2'], 'solver': ['liblinear']}
}

import warnings
# Ignore all warnings
warnings.simplefilter("ignore")

# Perform GridSearchCV for each model


best_models = {}

for name, model in models.items():


grid_search = GridSearchCV(model, param_grid=param_grid[name],
scoring='accuracy', cv=5)
grid_search.fit(X_train, y_train)
best_models[name] = grid_search.best_estimator_

# Print optimal hyperparameters for each model


print(f"{name} - Optimal Hyperparameters:
{grid_search.best_params_}")

SVM - Optimal Hyperparameters: {'C': 10, 'gamma': 0.01, 'kernel':


'linear'}
Random Forest - Optimal Hyperparameters: {'max_depth': None,
'min_samples_split': 10, 'n_estimators': 50}
Logistic Regression - Optimal Hyperparameters: {'C': 10, 'penalty':
'l2', 'solver': 'liblinear'}

# Evaluate best models on the test set


for name, model in best_models.items():
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"{name} - Test Accuracy: {accuracy}")
SVM - Test Accuracy: 0.9777777777777777
Random Forest - Test Accuracy: 0.9666666666666667
Logistic Regression - Test Accuracy: 0.9555555555555556

Thank You

Name: Clement Asare

Email: [email protected]

ORCID: 0009-0000-2684-7611

YouTube: https://bit.ly/GmathStats

You might also like