To Improve The Performance of Models Predicting Ba
To Improve The Performance of Models Predicting Ba
We start by importing necessary libraries, loading the data, and performing initial preprocessing:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, learning_curve, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
import xgboost as xgb
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
battery_data = pd.read_csv('00041.csv')
data = battery_data[['Time', 'Voltage_measured', 'Current_measured', 'Temperature_measured',
'Current_load', 'Voltage_load']]
data['Time'] = pd.to_datetime(data['Time'])
data.set_index('Time', inplace=True)
We split the data into training and test sets and normalize it:
Create Sequences:
For LSTM and GRU models, we create sequences of data points:
SEQ_LENGTH = 10
X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)
X_train_flat = train_scaled.iloc[:-SEQ_LENGTH].values
y_train_flat = y_train
X_test_flat = test_scaled.iloc[:-SEQ_LENGTH].values
y_test_flat = y_test
Data Augmentation:
We use bootstrapping to augment the training data:
Define Models:
We define the deep LSTM and GRU networks and set up hyperparameter tuning for XGBoost:
def create_lstm_model(input_shape):
model = Sequential([
LSTM(100, activation='relu', return_sequences=True, input_shape=input_shape),
Dropout(0.2),
LSTM(50, activation='relu'),
Dropout(0.2),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
def create_gru_model(input_shape):
model = Sequential([
GRU(100, activation='relu', return_sequences=True, input_shape=input_shape),
Dropout(0.2),
GRU(50, activation='relu'),
Dropout(0.2),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
model_lstm = create_lstm_model(input_shape)
model_gru = create_gru_model(input_shape)
param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [3, 5, 7],
'learning_rate': [0.01, 0.05, 0.1],
'subsample': [0.8, 0.9, 1.0]
}
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3,
scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
grid_search.fit(X_train_aug, y_train_aug)
best_xgb_model = grid_search.best_estimator_
model_knn = KNeighborsRegressor(n_neighbors=5)
knn_scores = cross_val_score(model_knn, X_train_aug, y_train_aug, cv=5,
scoring='neg_mean_squared_error')
print("KNN Cross-Validation Scores:", knn_scores)
model_rf = RandomForestRegressor(n_estimators=100)
rf_scores = cross_val_score(model_rf, X_train_aug, y_train_aug, cv=5,
scoring='neg_mean_squared_error')
print("Random Forest Cross-Validation Scores:", rf_scores)
Train Models:
We train the LSTM, GRU, XGBoost, KNN, and Random Forest models:
model_knn.fit(X_train_aug, y_train_aug)
model_rf.fit(X_train_aug, y_train_aug)
Make Predictions:
We make predictions using the trained models:
y_pred_lstm = model_lstm.predict(X_test)
y_pred_gru = model_gru.predict(X_test)
y_pred_xgb = best_xgb_model.predict(X_test_flat)
y_pred_knn = model_knn.predict(X_test_flat)
y_pred_rf = model_rf.predict(X_test_flat)
Visualize Results:
We prepare the results for visualization and plot them:
results = pd.DataFrame({
'Actual': y_test,
'LSTM': y_pred_lstm.flatten(),
'GRU': y_pred_gru.flatten(),
'XGBoost': y_pred_xgb,
'KNN': y_pred_knn,
'RF': y_pred_rf,
'Combined': y_pred_combined
}, index=test.index[SEQ_LENGTH:])
plt.figure(figsize=(14, 8))
sns.lineplot(data=results, markers=True)
plt.title('Battery Temperature Prediction')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.legend()
plt.show()
plot_learning_curve(history_lstm, 'LSTM')
plot_learning_curve(history_gru, 'GRU')
Evaluate Metrics:
We calculate the performance metrics for each model:
comparison_table = pd.DataFrame({
'Model': ['LSTM', 'GRU', 'XGBoost', 'KNN', 'Random Forest', 'Combined'],
'MAE': [metrics_lstm[0], metrics_gru[0], metrics_xgb[0], metrics_knn[0], metrics_rf[0],
metrics_combined[0]],
'MSE': [metrics_lstm[1], metrics_gru[1], metrics_xgb[1], metrics_knn[1], metrics_rf[1],
metrics_combined[1]],
'RMSE': [metrics_lstm[2], metrics_gru[2], metrics_xgb[2], metrics_knn[2], metrics_rf[2],
metrics_combined[2]],
'R2': [metrics_lstm[3], metrics_gru[3], metrics_xgb[3], metrics_knn[3], metrics_rf[3],
metrics_combined[3]]
})
print(comparison_table)
plt.figure(figsize=(14, 8))
plt.plot(train_sizes, train_scores_mean, 'o-', label='Training Error')
plt.plot(train_sizes, val_scores_mean, 'o-', label='Validation Error')
plt.title(f'Internal Validation Curve - {model_name}')
plt.xlabel('Training Set Size')
plt.ylabel('Error (MSE)')
plt.legend()
plt.show()
train_scores.append(mean_squared_error(y_train_part, train_pred))
val_scores.append(mean_squared_error(y_test, val_pred))
plt.figure(figsize=(14, 8))
plt.plot(train_sizes, train_scores, 'o-', label='Training Error')
plt.plot(train_sizes, val_scores, 'o-', label='Validation Error')
plt.title(f'External Validation Curve - {model_name}')
plt.xlabel('Training Set Size')
plt.ylabel('Error (MSE)')
plt.legend()
plt.show()
Results Summary:
After implementing the above steps, we achieved the following performance metrics:
GRU model outperformed others with the lowest MAE, MSE, and RMSE, and a positive R2 score.
LSTM also performed well, although not as well as GRU.
Combined predictions showed moderate performance.
XGBoost, KNN, and Random Forest models did not perform as well as the deep learning models.
Model MAE MSE RMSE R2
LSTM 1.109204 1.819550 1.348907 -2.866460
GRU 0.303446 0.111909 0.334528 0.762198
XGBoost 1.990743 4.434215 2.105757 -8.422503
KNN 2.481018 6.893967 2.625637 -13.649365
Random 1.832857 3.829704 1.956963 -7.137946
Forest
Combined 1.543454 2.742196 1.655958 -4.827040