0% found this document useful (0 votes)
9 views

fds assign 3

Uploaded by

Komal Rathod
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

fds assign 3

Uploaded by

Komal Rathod
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 4

SET A)

import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as s
df = pd.read_csv('data.csv')
#print(df)

#1
.head(3))
print("\n")

#2
print(df.isnull())
print("\nDrop rows value having 0 \
n",df.dropna())
#print("\nEvery missing value replace
with 0 \n",(df.fillna("NaN",
inplace=True)))
mean = df['Age'].mean()
df['Age'].fillna(mean, i
print("\n")
print("Statistical Data : \n",
df.describe())
print("\n")
print("Shape : ",df.shape)
print("\n")
print("Top 3 Head : \n",df nplace=True)
print("\n\n",df)

mean = df['Salary'].mean()
df['Salary'].fillna(mean, inplace=True)
print("\n\n",df)
#3
from sklearn.preprocessing import
LabelEncoder
from sklearn.preprocessing import
OneHotEncoder
#print("\nOne Hot :\n")
enc =
OneHotEncoder(handle_unknown='ignore')
enc_df =
pd.DataFrame(enc.fit_transform(df[['Purch
ased']]).toarray())
print(" One Hot : \n ",enc_df)

labelencoder = LabelEncoder()
df['Country'] =
labelencoder.fit_transform(df['Country'])
print("\n\n",df)
labelencoder = LabelEncoder()
df['Purchased'] =
labelencoder.fit_transform(df['Purchased'
])
print("\n\n",df)

print("\n\nLevel Encoder :\n",df)

print("\nJoined Country & Salary :\n")


df = df.join(enc_df)
print(df)

SET B)
import pandas as pd
import numpy as np
from sklearn import preprocessing
import scipy.stats as s
#creating dataframe
d={'c01':[1,3,7,4], 'c02':[12,2,7,1],
'c03':[22,34,-11,9]}
df2 = pd.DataFrame(d)
print("\n Original data values")

print(df2)

#Rescaling data
print("\n\ndata scaling betweeen 0 to 1")
data_scaler =
preprocessing.MinMaxScaler(feature_range=
(0,1))
data_scaled =
data_scaler.fit_transform(df2)
print("\nMin Max Scaled data")
print(data_scaled.round(2))

#Normalization rescales such that sumof


each row is 1
dn = preprocessing.normalize(df2,
norm='l1')
print("\nL1 Normalized Data ")
print(dn.round(2))

#Binarize data (make Binary)


data_binarized =
preprocessing.Binarizer(threshold=5).tran
sform(df2)
print("\nBinarized data")
print(data_binarized)

#Standardize data
print("\nStandardize data")
x_train = np.array([[1.,-1.,2.],
[2.,0.,0.],[0.,1.,-1.]])
print("\nOriginal Data \n",x_train)
print("\nInitial mean :
",s.tmean(x_train).round(2))
print("Initial Standard
deviation :",round(x_train.std(),2))
x_scaled = preprocessing.scale(x_train)
x_scaled.mean(axis=0)
x_scaled.std(axis=0)
print("\nStadardized data :\
n",x_scaled.round(2))
print("\nScaled mean :
",s.tmean(x_scaled).round(2))
print("\nScaled standard Deviation :
",round(x_scaled.std(),2))

You might also like