0% found this document useful (0 votes)
5 views

Electrical Machine Learning Tool

Machine learning data for Electrical engine

Uploaded by

Martins Richmond
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Electrical Machine Learning Tool

Machine learning data for Electrical engine

Uploaded by

Martins Richmond
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

In [20]:

# Importing necessary libraries


import pandas as pd # Used for data manipulation and handling
import numpy as np # Useful for numerical operations
from sklearn.model_selection import train_test_split # For splitting the data
from sklearn.linear_model import LinearRegression # The ML model we will use
from sklearn.metrics import mean_squared_error, r2_score # For evaluating the

In [2]:

# Load the dataset


df = pd.read_csv('Electricity_Consumption_Dataset.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 5000 non-null object
1 Hour 5000 non-null int64
2 Number_of_Appliances 4700 non-null float64
3 Usage_Duration 4700 non-null float64
4 Peak_Usage 4800 non-null float64
5 Electricity_Consumption 5000 non-null float64
dtypes: float64(4), int64(1), object(1)
memory usage: 234.5+ KB

In [5]:
df.head()

Out [5]:
Date Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
2023-
0 0 5.0 1.118288 0.0 4.935174
01-01
2023-
1 1 4.0 1.737984 0.0 7.495992
01-01
2023-
2 2 4.0 3.350142 0.0 11.460053
01-01
2023-
3 3 5.0 4.893616 0.0 28.588596
01-01
2023-
4 4 5.0 1.030203 0.0 4.929359
01-01

In [6]:
df.describe()

Out [6]:
Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
count 5000.000000 4700.000000 4700.000000 4800.000000 5000.000000
mean 11.487200 5.020426 2.750078 0.125000 15.082294
std 6.925332 2.220455 1.295222 0.330753 11.412605
min 0.000000 0.000000 0.500025 0.000000 0.000000
25% 5.000000 3.000000 1.613859 0.000000 6.768928
50% 11.000000 5.000000 2.751787 0.000000 12.560582
75% 17.000000 6.000000 3.854879 0.000000 20.362971
max 23.000000 15.000000 4.999052 1.000000 121.078379
In [7]:

# Data Preprocessing
# -------------------
# Convert 'Date' to datetime type for any time series analysis necessity
df['Date'] = pd.to_datetime(df['Date'])

In [11]:

# Handling missing values by filling them with the median of the column
for column in ['Number_of_Appliances', 'Usage_Duration', 'Peak_Usage']:
if df[column].isnull().any():
df[column].fillna(df[column].median(), inplace=True)

df.head()

Out [11]:
Date Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
2023-
0 0 5.0 1.118288 0.0 4.935174
01-01
2023-
1 1 4.0 1.737984 0.0 7.495992
01-01
2023-
2 2 4.0 3.350142 0.0 11.460053
01-01
2023-
3 3 5.0 4.893616 0.0 28.588596
01-01
2023-
4 4 5.0 1.030203 0.0 4.929359
01-01

In [12]:

# Feature Engineering (if needed)


# -------------------------------
# For example, creating new features that might help improve model performance
# Here, we can think of extracting day of the week or month from the date if r
df['DayOfWeek'] = df['Date'].dt.dayofweek

In [13]:

# Modeling
# --------
# Define features and target variable
X = df[['Hour', 'Number_of_Appliances', 'Usage_Duration', 'Peak_Usage', 'DayOf
y = df['Electricity_Consumption']

In [14]:

# Split the data into train and test sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, rando

In [15]:

# Initialize the Linear Regression model


model = LinearRegression()

In [16]:

# Train the model


model.fit(X_train, y_train)
Out [16]: ▾ LinearRegression

LinearRegression()

In [17]:

# Predict on the test set


y_pred = model.predict(X_test)

In [21]:

# Evaluation
# ----------
# Calculate the Mean Squared Error and the R^2 score to evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [22]:

print(f'Mean Squared Error (MSE): {mse}')


print(f'R-squared Score: {r2}')

# The MSE provides a measure of how well the model predictions approximate the
# The R-squared score is a statistical measure of how close the data are to th

Mean Squared Error (MSE): 34.27105705807505


R-squared Score: 0.7447340948875754

You might also like