Medical Insurance Analysis ??
Medical Insurance Analysis ??
/kaggle/input/insurance/insurance.csv
[2]: df = pd.read_csv("/kaggle/input/insurance/insurance.csv")
df.head()
[3]: df.isnull().sum()
[3]: age 0
sex 0
bmi 0
children 0
smoker 0
region 0
charges 0
dtype: int64
1
le.fit(df_aug.sex.drop_duplicates())
df_aug.sex = le.transform(df_aug.sex)
# smoker or not
le.fit(df_aug.smoker.drop_duplicates())
df_aug.smoker = le.transform(df_aug.smoker)
#region
le.fit(df_aug.region.drop_duplicates())
df_aug.region = le.transform(df_aug.region)
[5]: df_aug.corr()['charges'].sort_values()
square=True, ax=ax)
2
[ ]:
[7]: f= pl.figure(figsize=(12,5))
ax=f.add_subplot(121)
sns.distplot(df_aug[(df_aug.smoker == 1)]["charges"],color='c',ax=ax)
ax.set_title('Distribution of charges for smokers')
ax=f.add_subplot(122)
sns.distplot(df_aug[(df_aug.smoker == 0)]['charges'],color='b',ax=ax)
ax.set_title('Distribution of charges for non-smokers')
3
[8]: sns.catplot(x="smoker", kind="count",hue = 'sex', palette="pink", data=df)
4
[9]: sns.catplot(x="sex", y="charges", hue="smoker",
kind="violin", data=df, palette = 'magma')
[10]: pl.figure(figsize=(12,5))
pl.title("Box plot for charges of women")
sns.boxplot(y="smoker", x="charges", data = df_aug[(df_aug.sex == 1)] ,␣
↪orient="h", palette = 'magma')
5
[11]: pl.figure(figsize=(12,5))
pl.title("Box plot for charges of men")
sns.boxplot(y="smoker", x="charges", data = df_aug[(df_aug.sex == 0)] ,␣
↪orient="h", palette = 'rainbow')
[12]: pl.figure(figsize=(12,5))
pl.title("Distribution of age")
ax = sns.distplot(df_aug["age"], color = 'g')
6
[13]: g = sns.jointplot(x="age", y="charges", data = df_aug[(df_aug.smoker ==␣
↪0)],kind="kde", fill=True, cmap= "flare")
7
[14]: g = sns.jointplot(x="age", y="charges", data = df_aug[(df_aug.smoker ==␣
↪1)],kind="kde", fill=True, cmap="magma")
8
[15]: sns.lmplot(x="age", y="charges", hue="smoker", data=df_aug, palette =␣
↪'inferno_r')
9
[16]: pl.figure(figsize=(12,5))
pl.title("Distribution of bmi")
ax = sns.distplot(df["bmi"], color = 'm')
10
[17]: pl.figure(figsize=(12,5))
pl.title("Distribution of charges for patients with BMI greater than 30")
ax = sns.distplot(df[(df.bmi >= 30)]['charges'], color = 'c')
[18]: pl.figure(figsize=(12,5))
pl.title("Distribution of charges for patients with BMI less than 30")
ax = sns.distplot(df[(df.bmi < 30)]['charges'], color = 'b')
11
[19]: g = sns.jointplot(x="bmi", y="charges", data = df,kind="kde", fill = True, cmap␣
↪= 'viridis')
[20]: pl.figure(figsize=(10,6))
ax = sns.
↪scatterplot(x='bmi',y='charges',data=df_aug,palette='magma',hue='smoker')
12
sns.lmplot(x="bmi", y="charges", hue="smoker", data=df_aug, palette = 'magma')
13
[21]: sns.catplot(x="children", kind="count", palette="ch:.25", data=df_aug)
14
[22]: sns.catplot(x="smoker", kind="count", palette="rainbow",hue = "sex",
data=df[(df.children > 0)])
ax.set_title('Smokers and non-smokers who have childrens')
15
[23]: from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)
print(lr.score(x_test,y_test))
0.7962732059725786
16
[25]: X = df_aug.drop(['charges','region'], axis = 1)
Y = df_aug.charges
plr = LinearRegression().fit(X_train,Y_train)
Y_train_pred = plr.predict(X_train)
Y_test_pred = plr.predict(X_test)
print(plr.score(X_test,Y_test))
0.8849197344147234
[ ]:
[ ]:
17