0% found this document useful (0 votes)
12 views

House 2

Uploaded by

snigenigmatic972
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

House 2

Uploaded by

snigenigmatic972
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

import numpy as np

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
file=pd.read_csv("E:\PAML\Week_2\Delhi_v2.csv")
data=pd.DataFrame(file)
data

price Address
area \
0 5600000 Noida Extension, Noida, Delhi NCR
1350
1 8800000 Sector 79, Gurgaon, Delhi NCR
1490
2 16500000 Vaishali, Ghaziabad, Delhi NCR
2385
3 3810000 Link Road, F Block, Sector 50, Noida, Uttar Pr...
1050
4 6200000 Jaypee Pavilion Court Sector 128, Noida, Secto...
1350
... ... ... ..
.
7733 7900000 Indirapuram, Ghaziabad, Delhi NCR
1095
7734 4510000 Greater Noida, Sector 2, Greater Noida, Delhi NCR
1060
7735 7000000 Crossings Republik, Ghaziabad, Delhi NCR
1898
7736 6500000 Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...
1400
7737 6500000 sandal apartment, Shalimar Garden Extension 1,...
1750

latitude longitude Bedrooms Bathrooms Balcony


Status \
0 28.608850 77.460560 3 3 NaN Under
Construction
1 28.374236 76.952416 3 3 NaN Ready
to Move
2 28.645769 77.385110 4 5 NaN Ready
to Move
3 28.566914 77.436434 2 2 3.0
NaN
4 28.520732 77.356491 2 2 3.0 Ready
to Move
... ... ... ... ... ...
...
7733 28.635272 77.370395 2 2 NaN Ready
to Move
7734 28.581431 77.452819 2 2 3.0
NaN
7735 28.625850 77.435336 4 3 5.0 Ready
to Move
7736 28.701622 77.430153 3 3 2.0 Ready
to Move
7737 28.693590 77.344376 3 2 3.0 Ready
to Move

neworold parking Furnished_status Lift \


0 New Property NaN NaN 2.0
1 New Property NaN Semi-Furnished 2.0
2 New Property 1.0 Unfurnished NaN
3 New Property 1.0 Unfurnished 2.0
4 Resale 1.0 NaN 3.0
... ... ... ... ...
7733 Resale NaN NaN NaN
7734 Resale NaN Semi-Furnished NaN
7735 Resale NaN NaN NaN
7736 Resale 1.0 NaN 3.0
7737 New Property NaN NaN NaN

Landmarks type_of_building \
0 NaN Flat
1 NaN Flat
2 NaN Flat
3 near Gaur Mulberry Mansion Flat
4 NaN Flat
... ... ...
7733 NaN Flat
7734 ek murti chowk Flat
7735 NaN Flat
7736 vvip mall Flat
7737 NaN Flat

desc Price_sqft
0 \n\n\n Welcome ... 4148.148148
1 \n\n\n Mapsko M... 5906.040268
2 \n\n\n This pro... 6918.238994
3 \n\n\n AIG Roya... 3628.571429
4 \n\n\n The prop... 4592.592593
... ... ...
7733 \n \n \n... 7214.611872
7734 \n \n \n... 4254.716981
7735 \n \n \n... 3688.092729
7736 \n \n \n... 4642.857143
7737 \n 3714.285714

[7738 rows x 17 columns]


Dropping null rows and then dropping null columns

data_dropna = data.dropna()
data_dropna_cols = data.dropna(axis=1)
data

price Address
area \
0 5600000 Noida Extension, Noida, Delhi NCR
1350
1 8800000 Sector 79, Gurgaon, Delhi NCR
1490
2 16500000 Vaishali, Ghaziabad, Delhi NCR
2385
3 3810000 Link Road, F Block, Sector 50, Noida, Uttar Pr...
1050
4 6200000 Jaypee Pavilion Court Sector 128, Noida, Secto...
1350
... ... ... ..
.
7733 7900000 Indirapuram, Ghaziabad, Delhi NCR
1095
7734 4510000 Greater Noida, Sector 2, Greater Noida, Delhi NCR
1060
7735 7000000 Crossings Republik, Ghaziabad, Delhi NCR
1898
7736 6500000 Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...
1400
7737 6500000 sandal apartment, Shalimar Garden Extension 1,...
1750

latitude longitude Bedrooms Bathrooms Balcony


Status \
0 28.608850 77.460560 3 3 NaN Under
Construction
1 28.374236 76.952416 3 3 NaN Ready
to Move
2 28.645769 77.385110 4 5 NaN Ready
to Move
3 28.566914 77.436434 2 2 3.0
NaN
4 28.520732 77.356491 2 2 3.0 Ready
to Move
... ... ... ... ... ...
...
7733 28.635272 77.370395 2 2 NaN Ready
to Move
7734 28.581431 77.452819 2 2 3.0
NaN
7735 28.625850 77.435336 4 3 5.0 Ready
to Move
7736 28.701622 77.430153 3 3 2.0 Ready
to Move
7737 28.693590 77.344376 3 2 3.0 Ready
to Move

neworold parking Furnished_status Lift \


0 New Property NaN NaN 2.0
1 New Property NaN Semi-Furnished 2.0
2 New Property 1.0 Unfurnished NaN
3 New Property 1.0 Unfurnished 2.0
4 Resale 1.0 NaN 3.0
... ... ... ... ...
7733 Resale NaN NaN NaN
7734 Resale NaN Semi-Furnished NaN
7735 Resale NaN NaN NaN
7736 Resale 1.0 NaN 3.0
7737 New Property NaN NaN NaN

Landmarks type_of_building \
0 NaN Flat
1 NaN Flat
2 NaN Flat
3 near Gaur Mulberry Mansion Flat
4 NaN Flat
... ... ...
7733 NaN Flat
7734 ek murti chowk Flat
7735 NaN Flat
7736 vvip mall Flat
7737 NaN Flat

desc Price_sqft
0 \n\n\n Welcome ... 4148.148148
1 \n\n\n Mapsko M... 5906.040268
2 \n\n\n This pro... 6918.238994
3 \n\n\n AIG Roya... 3628.571429
4 \n\n\n The prop... 4592.592593
... ... ...
7733 \n \n \n... 7214.611872
7734 \n \n \n... 4254.716981
7735 \n \n \n... 3688.092729
7736 \n \n \n... 4642.857143
7737 \n 3714.285714

[7738 rows x 17 columns]

data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB

data_dropna.info()

<class 'pandas.core.frame.DataFrame'>
Index: 329 entries, 10 to 7709
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 329 non-null int64
1 Address 329 non-null object
2 area 329 non-null int64
3 latitude 329 non-null float64
4 longitude 329 non-null float64
5 Bedrooms 329 non-null int64
6 Bathrooms 329 non-null int64
7 Balcony 329 non-null float64
8 Status 329 non-null object
9 neworold 329 non-null object
10 parking 329 non-null float64
11 Furnished_status 329 non-null object
12 Lift 329 non-null float64
13 Landmarks 329 non-null object
14 type_of_building 329 non-null object
15 desc 329 non-null object
16 Price_sqft 329 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 46.3+ KB

data_dropna_cols.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 neworold 7738 non-null object
8 type_of_building 7738 non-null object
9 desc 7738 non-null object
10 Price_sqft 7738 non-null float64
dtypes: float64(3), int64(4), object(4)
memory usage: 665.1+ KB

X =
data.drop(['price','Address','Status','neworold','Lift','parking','Bal
cony','Furnished_status','Landmarks','type_of_building','desc'],axis=1
)
y = data['price']

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB

X_train, X_test, y_train, y_test = train_test_split(X, y,


test_size=0.2, random_state=42)
train_data=X_train.join(y_train)
train_data

area latitude longitude Bedrooms Bathrooms Price_sqft


price
1226 1870 28.558498 77.392630 3 4 6417.112299
12000000
4528 1300 28.411186 77.049708 2 2 6538.461538
8500000
6218 1356 28.502597 77.428742 3 2 4793.510324
6500000
1512 1650 28.635710 77.365599 3 3 5454.545455
9000000
6634 1414 28.642848 77.382637 3 3 3818.953324
5400000
... ... ... ... ... ... ...
...
5226 875 28.432094 77.506766 2 2 3657.142857
3200000
5390 1600 28.417834 77.367077 3 3 4187.500000
6700000
860 840 28.685449 77.509226 2 2 3690.476190
3100000
7603 1150 28.474501 77.553187 2 2 4347.826087
5000000
7270 1595 28.604693 77.434898 3 2 5391.849530
8600000

[6190 rows x 7 columns]

supportvector = SVR(kernel='linear')
supportvector.fit(X_train,y_train)
y_pred = supportvector.predict(X_test)
supportvector.score(X_test,y_test)

0.8789697105261775

actual = y_test.to_numpy()
plt.plot(actual,color='black', label='Actual')
plt.plot(y_pred,color='red', label='Predicted')
plt.legend()
plt.show()

Creating copies and dropping columns and later rows

df1 = data.copy()
df1_dropna_colums = df1.dropna(axis=1)
data_dropna=data.dropna()
# data_dropna.info()
# # data_dropna_cols.info()
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB

df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB

X1=df1.drop(['price','Address','Status','neworold','Lift','parking','B
alcony','Furnished_status','Landmarks','type_of_building','desc'],axis
=1)
y1=df1['price']
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7738 entries, 0 to 7737
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 7738 non-null int64
1 Address 7738 non-null object
2 area 7738 non-null int64
3 latitude 7738 non-null float64
4 longitude 7738 non-null float64
5 Bedrooms 7738 non-null int64
6 Bathrooms 7738 non-null int64
7 Balcony 5166 non-null float64
8 Status 7164 non-null object
9 neworold 7738 non-null object
10 parking 2612 non-null float64
11 Furnished_status 4124 non-null object
12 Lift 1733 non-null float64
13 Landmarks 2759 non-null object
14 type_of_building 7738 non-null object
15 desc 7738 non-null object
16 Price_sqft 7738 non-null float64
dtypes: float64(6), int64(4), object(7)
memory usage: 1.0+ MB

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1,


test_size=0.2, random_state=42)
train_data_1 = X1_train.join(y1_train)
train_data_1

area latitude longitude Bedrooms Bathrooms Price_sqft


price
1226 1870 28.558498 77.392630 3 4 6417.112299
12000000
4528 1300 28.411186 77.049708 2 2 6538.461538
8500000
6218 1356 28.502597 77.428742 3 2 4793.510324
6500000
1512 1650 28.635710 77.365599 3 3 5454.545455
9000000
6634 1414 28.642848 77.382637 3 3 3818.953324
5400000
... ... ... ... ... ... ...
...
5226 875 28.432094 77.506766 2 2 3657.142857
3200000
5390 1600 28.417834 77.367077 3 3 4187.500000
6700000
860 840 28.685449 77.509226 2 2 3690.476190
3100000
7603 1150 28.474501 77.553187 2 2 4347.826087
5000000
7270 1595 28.604693 77.434898 3 2 5391.849530
8600000
[6190 rows x 7 columns]

svr1 = SVR(kernel='linear')
svr1.fit(X1_train,y1_train)
y1_pred = svr1.predict(X1_test)
svr1.score(X1_test,y1_test)

0.8789697105261775

actual1 = y1_test.to_numpy()
plt.plot(actual1, color='red', label = 'Actual Price')
plt.plot(y1_pred, color='green', label='Predicted values')
plt.legend()
plt.show()

You might also like