Electrical Machine Learning Tool
Electrical Machine Learning Tool
In [2]:
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 5000 non-null object
1 Hour 5000 non-null int64
2 Number_of_Appliances 4700 non-null float64
3 Usage_Duration 4700 non-null float64
4 Peak_Usage 4800 non-null float64
5 Electricity_Consumption 5000 non-null float64
dtypes: float64(4), int64(1), object(1)
memory usage: 234.5+ KB
In [5]:
df.head()
Out [5]:
Date Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
2023-
0 0 5.0 1.118288 0.0 4.935174
01-01
2023-
1 1 4.0 1.737984 0.0 7.495992
01-01
2023-
2 2 4.0 3.350142 0.0 11.460053
01-01
2023-
3 3 5.0 4.893616 0.0 28.588596
01-01
2023-
4 4 5.0 1.030203 0.0 4.929359
01-01
In [6]:
df.describe()
Out [6]:
Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
count 5000.000000 4700.000000 4700.000000 4800.000000 5000.000000
mean 11.487200 5.020426 2.750078 0.125000 15.082294
std 6.925332 2.220455 1.295222 0.330753 11.412605
min 0.000000 0.000000 0.500025 0.000000 0.000000
25% 5.000000 3.000000 1.613859 0.000000 6.768928
50% 11.000000 5.000000 2.751787 0.000000 12.560582
75% 17.000000 6.000000 3.854879 0.000000 20.362971
max 23.000000 15.000000 4.999052 1.000000 121.078379
In [7]:
# Data Preprocessing
# -------------------
# Convert 'Date' to datetime type for any time series analysis necessity
df['Date'] = pd.to_datetime(df['Date'])
In [11]:
# Handling missing values by filling them with the median of the column
for column in ['Number_of_Appliances', 'Usage_Duration', 'Peak_Usage']:
if df[column].isnull().any():
df[column].fillna(df[column].median(), inplace=True)
df.head()
Out [11]:
Date Hour Number_of_Appliances Usage_Duration Peak_Usage Electricity_Consumption
2023-
0 0 5.0 1.118288 0.0 4.935174
01-01
2023-
1 1 4.0 1.737984 0.0 7.495992
01-01
2023-
2 2 4.0 3.350142 0.0 11.460053
01-01
2023-
3 3 5.0 4.893616 0.0 28.588596
01-01
2023-
4 4 5.0 1.030203 0.0 4.929359
01-01
In [12]:
In [13]:
# Modeling
# --------
# Define features and target variable
X = df[['Hour', 'Number_of_Appliances', 'Usage_Duration', 'Peak_Usage', 'DayOf
y = df['Electricity_Consumption']
In [14]:
In [15]:
In [16]:
LinearRegression()
In [17]:
In [21]:
# Evaluation
# ----------
# Calculate the Mean Squared Error and the R^2 score to evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
In [22]:
# The MSE provides a measure of how well the model predictions approximate the
# The R-squared score is a statistical measure of how close the data are to th