0% found this document useful (0 votes)
7 views

Supple Maximizing Performance in Cs CuBiCl

Uploaded by

Pranjal
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

Supple Maximizing Performance in Cs CuBiCl

Uploaded by

Pranjal
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Simulating and Predicting Conversion Efficiency in

Cs₂CuBiCl₆ based perovskite Cells: A Machine


Learning Approach
Nikhil Shrivastav, Jaya Madan and Rahul Pandey*

VLSI Centre of Excellence, Chitkara University Institute of Engineering and Technology, Chitkara University,
Punjab, India
*Corresponding authors

[email protected], and [email protected]

Random Forest 1-3


XGBoost 3-4

Random Forest-

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import keras_tuner as kt
import shap
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,
r2_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
# Import the RandomSearch tuner from keras_tuner
from keras_tuner.tuners import RandomSearch
df = pd.read_csv('/content/drive/MyDrive/ML/Nikhil_ML3.csv')
# Split data into features and target variables
X = df[['Thickness', 'doping', 'Defect']]
y = df['PCE (%)']

# Split data into features and target variables


X = df[['Thickness', 'doping', 'Defect']]
y = df['PCE']
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split (X, y,
test_size=0.2, random_state=42)
# Creating the RandomForestRegressor model
rf_model = RandomForestRegressor()
# Defining the hyperparameters to search
params = {
'max_depth': [3, 5, 7],
'n_estimators': [50, 100, 200],
'min_samples_leaf': [1, 3, 5]
}
# Performing GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=params, cv=3,
n_jobs=-1)
grid_search.fit(X_train, y_train)
# Getting the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)
# Creating the final RandomForestRegressor model with the best
hyperparameters
final_rf_model = RandomForestRegressor(**best_params)
# Training the model
final_rf_model.fit(X_train, y_train)
# Predicting the target variable on the test set
y_test_pred = final_rf_model.predict(X_test)
# Calculating the mean squared error
mse = mean_squared_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)
cv_score = cross_val_score(grid_search.best_estimator_, X, y, cv=50)
print('Mean Squared Error:', mse)
print('R-squared:', r2)
print('Cross-Validation Score:', cv_score.mean())
print('Best Parameters:', grid_search.best_params_)
print('Best Score:', grid_search.best_score_)
Best Hyperparameters: {'max_depth': 7, 'min_samples_leaf': 1,
'n_estimators': 100}

Output
Mean Squared Error: 0.05262326227246941
R-squared: 0.9897033432799267
Cross-Validation Score: -5256.733933041297
Best Parameters: {'max_depth': 7, 'min_samples_leaf': 1, 'n_estimators':
100}
Best Score: 0.9914386927829856

Explaining the model's predictions using SHAP values


explainer = shap.TreeExplainer(final_rf_model)
shap_values = explainer.shap_values(X)

# Creating a SHAP plot to show the impact of all features


plt.savefig('18042023RF_PCE_SHAP values.png', dpi=600)
shap.summary_plot(shap_values, X)
# Predict on training and test sets
y_train_pred = final_rf_model.predict(X_train)

# Calculate the x-axis range for the plot


xmin = min(min(y_train_pred), min(y_test_pred))
xmax = max(max(y_train_pred), max(y_test_pred))

# Plotting actual PCE vs predicted FF for the training set and test set
plt.scatter(y_train, y_train_pred, c='blue', label='Training set')
plt.scatter(y_test, y_test_pred, c='red', label='Test set')
plt.plot(np.linspace(xmin, xmax, 100), np.linspace(xmin, xmax, 100), '-
-', label='Perfect fit')
plt.xlabel('Actual PCE')
plt.ylabel('Predicted PCE')
plt.title(' RF Actual vs Predicted | PCE')
plt.legend()
plt.savefig('10082023RF_PCE_Actual vs predicted.png', dpi=600)
plt.show()

XGBoost-

# Defining the hyperparameters to search


params = {
'learning_rate': [0.1, 0.3, 0.5],
'max_depth': [3, 5, 7],
'n_estimators': [50, 100, 200],
'min_child_weight': [1, 3, 5]
}

import pandas as pd
import numpy as np
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score

# Creating the XGBRegressor model


# Read the CSV file
csv_file_path = '/content/drive/MyDrive/ML/Nikhil_ML3.csv'
df = pd.read_csv(csv_file_path)
# Assume the last column is the target variable and the rest are
features
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
xgb_model = xgb.XGBRegressor()

# Performing GridSearchCV to find the best hyperparameters


grid_search = GridSearchCV(estimator=xgb_model, param_grid=params,
cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)
# Creating the final XGBRegressor model with the best hyperparameters
final_xgb_model = xgb.XGBRegressor(**best_params)
# Training the model
final_xgb_model.fit(X_train, y_train)
# Predicting the target variable on the test set
y_test_pred = final_xgb_model.predict(X_test)
# Predict on training and test sets
y_train_pred = final_xgb_model.predict(X_train)
# Calculating the mean squared error
mse = mean_squared_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)
cv_score = cross_val_score(grid_search.best_estimator_, X, y, cv=50)
print('Mean Squared Error:', mse)
print('R-squared:', r2)
print('Cross-Validation Score:', cv_score.mean())
print('Best Parameters:', grid_search.best_params_)
print('Best Score:', grid_search.best_score_)

Output
Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 7,
'min_child_weight': 1, 'n_estimators': 200}
Mean Squared Error: 0.0014407104963233793
R-squared: 0.9997180999281869
Cross-Validation Score: -4718.083636367358
Best Parameters: {'learning_rate': 0.1, 'max_depth': 7, 'min_child_weight':
1, 'n_estimators': 200}
Best Score: 0.9998299680327589

# Visualizing the first tree in the trained model


fig, ax = plt.subplots(figsize=(30, 20), dpi=600)
xgb.plot_tree(final_xgb_model, num_trees=0, ax=ax)
plt.savefig('/content/drive/MyDrive/ML/Nikhil_ML3.csv_PCE_tree in the
trained model.png', dpi=600)
plt.show()

import shap
explainer = shap.TreeExplainer(final_xgb_model)
shap_values = explainer.shap_values(X)
# Creating a SHAP plot to show the impact of all features
plt.savefig('20052023XGB_AgBiSCl2NK_PCE_SHAP values.png', dpi=600)
shap.summary_plot(shap_values, X)
# Calculate the x-axis range for the plot
xmin = min(min(y_train_pred), min(y_test_pred))
xmax = max(max(y_train_pred), max(y_test_pred))
# Plotting actual PCE vs predicted PCE for the training set and test
set
plt.scatter(y_train, y_train_pred, c='blue', label='Training set')
plt.scatter(y_test, y_test_pred, c='red', label='Test set')
plt.plot(np.linspace(xmin, xmax, 100), np.linspace(xmin, xmax, 100), '-
-', label='Perfect fit')
plt.xlabel('Actual PCE')
plt.ylabel('Predicted PCE')
plt.title('Actual vs Predicted | PCE | AgBiSCl2NK')
plt.legend()
plt.savefig('20052023XGB_AgBiSCl2NK_PCE_Actual vs predicted.png',
dpi=600)
plt.show()

You might also like