vertopal.com_Project_16_Calories_Burnt_Prediction
vertopal.com_Project_16_Calories_Burnt_Prediction
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn import metrics
User_ID Calories
0 14733363 231.0
1 14861698 66.0
2 11179863 26.0
3 16180408 71.0
4 17771927 35.0
exercise_data = pd.read_csv('/content/exercise.csv')
exercise_data.head()
calories_data.head()
User_ID Gender Age Height ... Duration Heart_Rate Body_Temp
Calories
0 14733363 male 68 190.0 ... 29.0 105.0 40.8
231.0
1 14861698 female 20 166.0 ... 14.0 94.0 40.3
66.0
2 11179863 male 69 179.0 ... 5.0 88.0 38.7
26.0
3 16180408 female 34 179.0 ... 13.0 100.0 40.5
71.0
4 17771927 female 27 154.0 ... 10.0 81.0 39.8
35.0
[5 rows x 9 columns]
(15000, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 User_ID 15000 non-null int64
1 Gender 15000 non-null object
2 Age 15000 non-null int64
3 Height 15000 non-null float64
4 Weight 15000 non-null float64
5 Duration 15000 non-null float64
6 Heart_Rate 15000 non-null float64
7 Body_Temp 15000 non-null float64
8 Calories 15000 non-null float64
dtypes: float64(6), int64(2), object(1)
memory usage: 1.0+ MB
User_ID 0
Gender 0
Age 0
Height 0
Weight 0
Duration 0
Heart_Rate 0
Body_Temp 0
Calories 0
dtype: int64
Data Analysis
[8 rows x 8 columns]
Data Visualization
sns.set()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43:
FutureWarning: Pass the following variable as a keyword arg: x. From
version 0.12, the only valid positional argument will be `data`, and
passing other arguments without an explicit keyword will result in an
error or misinterpretation.
FutureWarning
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbbd756110>
# finding the distribution of "Age" column
sns.distplot(calories_data['Age'])
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557:
FutureWarning: `distplot` is a deprecated function and will be removed
in a future version. Please adapt your code to use either `displot` (a
figure-level function with similar flexibility) or `histplot` (an
axes-level function for histograms).
warnings.warn(msg, FutureWarning)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbbd200550>
# finding the distribution of "Height" column
sns.distplot(calories_data['Height'])
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557:
FutureWarning: `distplot` is a deprecated function and will be removed
in a future version. Please adapt your code to use either `displot` (a
figure-level function with similar flexibility) or `histplot` (an
axes-level function for histograms).
warnings.warn(msg, FutureWarning)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbb1ed3d10>
# finding the distribution of "Weight" column
sns.distplot(calories_data['Weight'])
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557:
FutureWarning: `distplot` is a deprecated function and will be removed
in a future version. Please adapt your code to use either `displot` (a
figure-level function with similar flexibility) or `histplot` (an
axes-level function for histograms).
warnings.warn(msg, FutureWarning)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbb1e2c190>
Finding the Correlation in the dataset
1. Positive Correlation
2. Negative Correlation
correlation = calories_data.corr()
plt.figure(figsize=(10,10))
sns.heatmap(correlation, cbar=True, square=True, fmt='.1f',
annot=True, annot_kws={'size':8}, cmap='Blues')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbd5c75650>
Converting the text data to numerical values
calories_data.replace({"Gender":{'male':0,'female':1}}, inplace=True)
calories_data.head()
[5 rows x 9 columns]
X = calories_data.drop(columns=['User_ID','Calories'], axis=1)
Y = calories_data['Calories']
print(X)
print(Y)
0 231.0
1 66.0
2 26.0
3 71.0
4 35.0
...
14995 45.0
14996 23.0
14997 75.0
14998 11.0
14999 98.0
Name: Calories, Length: 15000, dtype: float64
Model Training
XGBoost Regressor
Evaluation
test_data_prediction = model.predict(X_test)
print(test_data_prediction)