fds assign 3
fds assign 3
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as s
df = pd.read_csv('data.csv')
#print(df)
#1
.head(3))
print("\n")
#2
print(df.isnull())
print("\nDrop rows value having 0 \
n",df.dropna())
#print("\nEvery missing value replace
with 0 \n",(df.fillna("NaN",
inplace=True)))
mean = df['Age'].mean()
df['Age'].fillna(mean, i
print("\n")
print("Statistical Data : \n",
df.describe())
print("\n")
print("Shape : ",df.shape)
print("\n")
print("Top 3 Head : \n",df nplace=True)
print("\n\n",df)
mean = df['Salary'].mean()
df['Salary'].fillna(mean, inplace=True)
print("\n\n",df)
#3
from sklearn.preprocessing import
LabelEncoder
from sklearn.preprocessing import
OneHotEncoder
#print("\nOne Hot :\n")
enc =
OneHotEncoder(handle_unknown='ignore')
enc_df =
pd.DataFrame(enc.fit_transform(df[['Purch
ased']]).toarray())
print(" One Hot : \n ",enc_df)
labelencoder = LabelEncoder()
df['Country'] =
labelencoder.fit_transform(df['Country'])
print("\n\n",df)
labelencoder = LabelEncoder()
df['Purchased'] =
labelencoder.fit_transform(df['Purchased'
])
print("\n\n",df)
SET B)
import pandas as pd
import numpy as np
from sklearn import preprocessing
import scipy.stats as s
#creating dataframe
d={'c01':[1,3,7,4], 'c02':[12,2,7,1],
'c03':[22,34,-11,9]}
df2 = pd.DataFrame(d)
print("\n Original data values")
print(df2)
#Rescaling data
print("\n\ndata scaling betweeen 0 to 1")
data_scaler =
preprocessing.MinMaxScaler(feature_range=
(0,1))
data_scaled =
data_scaler.fit_transform(df2)
print("\nMin Max Scaled data")
print(data_scaled.round(2))
#Standardize data
print("\nStandardize data")
x_train = np.array([[1.,-1.,2.],
[2.,0.,0.],[0.,1.,-1.]])
print("\nOriginal Data \n",x_train)
print("\nInitial mean :
",s.tmean(x_train).round(2))
print("Initial Standard
deviation :",round(x_train.std(),2))
x_scaled = preprocessing.scale(x_train)
x_scaled.mean(axis=0)
x_scaled.std(axis=0)
print("\nStadardized data :\
n",x_scaled.round(2))
print("\nScaled mean :
",s.tmean(x_scaled).round(2))
print("\nScaled standard Deviation :
",round(x_scaled.std(),2))