House 2
House 2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
file=pd.read_csv("E:\PAML\Week_2\Delhi_v2.csv")
data=pd.DataFrame(file)
data
price Address
area \
0 5600000 Noida Extension, Noida, Delhi NCR
1350
1 8800000 Sector 79, Gurgaon, Delhi NCR
1490
2 16500000 Vaishali, Ghaziabad, Delhi NCR
2385
3 3810000 Link Road, F Block, Sector 50, Noida, Uttar Pr...
1050
4 6200000 Jaypee Pavilion Court Sector 128, Noida, Secto...
1350
... ... ... ..
.
7733 7900000 Indirapuram, Ghaziabad, Delhi NCR
1095
7734 4510000 Greater Noida, Sector 2, Greater Noida, Delhi NCR
1060
7735 7000000 Crossings Republik, Ghaziabad, Delhi NCR
1898
7736 6500000 Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...
1400
7737 6500000 sandal apartment, Shalimar Garden Extension 1,...
1750
Landmarks type_of_building \
0 NaN Flat
1 NaN Flat
2 NaN Flat
3 near Gaur Mulberry Mansion Flat
4 NaN Flat
... ... ...
7733 NaN Flat
7734 ek murti chowk Flat
7735 NaN Flat
7736 vvip mall Flat
7737 NaN Flat
desc Price_sqft
0 \n\n\n Welcome ... 4148.148148
1 \n\n\n Mapsko M... 5906.040268
2 \n\n\n This pro... 6918.238994
3 \n\n\n AIG Roya... 3628.571429
4 \n\n\n The prop... 4592.592593
... ... ...
7733 \n \n \n... 7214.611872
7734 \n \n \n... 4254.716981
7735 \n \n \n... 3688.092729
7736 \n \n \n... 4642.857143
7737 \n 3714.285714
data_dropna = data.dropna()
data_dropna_cols = data.dropna(axis=1)
data
price Address
area \
0 5600000 Noida Extension, Noida, Delhi NCR
1350
1 8800000 Sector 79, Gurgaon, Delhi NCR
1490
2 16500000 Vaishali, Ghaziabad, Delhi NCR
2385
3 3810000 Link Road, F Block, Sector 50, Noida, Uttar Pr...
1050
4 6200000 Jaypee Pavilion Court Sector 128, Noida, Secto...
1350
... ... ... ..
.
7733 7900000 Indirapuram, Ghaziabad, Delhi NCR
1095
7734 4510000 Greater Noida, Sector 2, Greater Noida, Delhi NCR
1060
7735 7000000 Crossings Republik, Ghaziabad, Delhi NCR
1898
7736 6500000 Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...
1400
7737 6500000 sandal apartment, Shalimar Garden Extension 1,...
1750
Landmarks type_of_building \
0 NaN Flat
1 NaN Flat
2 NaN Flat
3 near Gaur Mulberry Mansion Flat
4 NaN Flat
... ... ...
7733 NaN Flat
7734 ek murti chowk Flat
7735 NaN Flat
7736 vvip mall Flat
7737 NaN Flat
desc Price_sqft
0 \n\n\n Welcome ... 4148.148148
1 \n\n\n Mapsko M... 5906.040268
2 \n\n\n This pro... 6918.238994
3 \n\n\n AIG Roya... 3628.571429
4 \n\n\n The prop... 4592.592593
... ... ...
7733 \n \n \n... 7214.611872
7734 \n \n \n... 4254.716981
7735 \n \n \n... 3688.092729
7736 \n \n \n... 4642.857143
7737 \n 3714.285714
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB
data_dropna.info()
<class 'pandas.core.frame.DataFrame'>
Index: 329 entries, 10 to 7709
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 329 non-null int64
1 Address 329 non-null object
2 area 329 non-null int64
3 latitude 329 non-null float64
4 longitude 329 non-null float64
5 Bedrooms 329 non-null int64
6 Bathrooms 329 non-null int64
7 Balcony 329 non-null float64
8 Status 329 non-null object
9 neworold 329 non-null object
10 parking 329 non-null float64
11 Furnished_status 329 non-null object
12 Lift 329 non-null float64
13 Landmarks 329 non-null object
14 type_of_building 329 non-null object
15 desc 329 non-null object
16 Price_sqft 329 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 46.3+ KB
data_dropna_cols.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 neworold 7738 non-null object
8 type_of_building 7738 non-null object
9 desc 7738 non-null object
10 Price_sqft 7738 non-null float64
dtypes: float64(3), int64(4), object(4)
memory usage: 665.1+ KB
X =
data.drop(['price','Address','Status','neworold','Lift','parking','Bal
cony','Furnished_status','Landmarks','type_of_building','desc'],axis=1
)
y = data['price']
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB
supportvector = SVR(kernel='linear')
supportvector.fit(X_train,y_train)
y_pred = supportvector.predict(X_test)
supportvector.score(X_test,y_test)
0.8789697105261775
actual = y_test.to_numpy()
plt.plot(actual,color='black', label='Actual')
plt.plot(y_pred,color='red', label='Predicted')
plt.legend()
plt.show()
df1 = data.copy()
df1_dropna_colums = df1.dropna(axis=1)
data_dropna=data.dropna()
# data_dropna.info()
# # data_dropna_cols.info()
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB
X1=df1.drop(['price','Address','Status','neworold','Lift','parking','B
alcony','Furnished_status','Landmarks','type_of_building','desc'],axis
=1)
y1=df1['price']
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB
svr1 = SVR(kernel='linear')
svr1.fit(X1_train,y1_train)
y1_pred = svr1.predict(X1_test)
svr1.score(X1_test,y1_test)
0.8789697105261775
actual1 = y1_test.to_numpy()
plt.plot(actual1, color='red', label = 'Actual Price')
plt.plot(y1_pred, color='green', label='Predicted values')
plt.legend()
plt.show()