Data - Preprocessing - Tools - Ipynb - Colaboratory
Data - Preprocessing - Tools - Ipynb - Colaboratory
ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset= pd.read_csv('Data.csv')
X= dataset.iloc[:, :-1]
Y= dataset.iloc[:, -1]
print(X)
print(Y)
0 No
1 Yes
2 No
3 No
4 Yes
5 Yes
6 No
7 Yes
8 No
9 Yes
imputer= SimpleImputer(missing_values=np.nan,strategy='mean')
imputer.fit(X.iloc[:, 1:3])
X.iloc[:, 1:3] = imputer.transform(X.iloc[:, 1:3])
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct= ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthr
X = np.array(ct.fit_transform(X))
print(X)
7.20000000e+04]
4.80000000e+04]
5.40000000e+04]
6.10000000e+04]
6.37777778e+04]
5.80000000e+04]
5.20000000e+04]
7.90000000e+04]
8.30000000e+04]
6.70000000e+04]]
from sklearn.preprocessing import LabelEncoder
le =LabelEncoder()
Y= le.fit_transform(Y)
print(Y)
[0 1 0 0 1 1 0 1 0 1]
Splitting the dataset into the Training set and Test set
https://colab.research.google.com/drive/1dRbDzNhckDa9llj_HNTGKg0EUrQN_E_k#printMode=true 2/4
3/7/22, 3:49 PM Copy of data_preprocessing_tools.ipynb - Colaboratory
Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train)
5.20000000e+04]
6.37777778e+04]
7.20000000e+04]
6.10000000e+04]
4.80000000e+04]
7.90000000e+04]
8.30000000e+04]
5.80000000e+04]]
print(Y_train)
[0 1 0 0 1 1 0 1]
print(X_test)
print(Y_test)
[0 1]
Feature Scaling
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
X_train[:, 3:]=sc.fit_transform(X_train[:, 3:])
X_test[:, 3:]=sc.transform(X_test[:, 3:])
print(X_train)
[[ 0. 0. 1. -0.19159184 -1.07812594]
[ 0. 1. 0. -0.01411729 -0.07013168]
[ 1. 0. 0. 0.56670851 0.63356243]
https://colab.research.google.com/drive/1dRbDzNhckDa9llj_HNTGKg0EUrQN_E_k#printMode=true 3/4
3/7/22, 3:49 PM Copy of data_preprocessing_tools.ipynb - Colaboratory
[ 0. 0. 1. -0.30453019 -0.30786617]
[ 0. 0. 1. -1.90180114 -1.42046362]
[ 1. 0. 0. 1.14753431 1.23265336]
[ 0. 1. 0. 1.43794721 1.57499104]
[ 1. 0. 0. -0.74014954 -0.56461943]]
print(X_test)
[[ 0. 1. 0. -1.46618179 -0.9069571 ]
[ 1. 0. 0. -0.44973664 0.20564034]]
https://colab.research.google.com/drive/1dRbDzNhckDa9llj_HNTGKg0EUrQN_E_k#printMode=true 4/4