【Python】波士顿房价预测

会的全对٩(ˊᗜˋ*)و

于 2025-07-20 13:29:51 发布

阅读量350

点赞数 7

CC 4.0 BY-SA版权

分类专栏： Python 文章标签： python 开发语言经验分享机器学习

本文链接：https://blog.csdn.net/2201_75406816/article/details/149469245

Python 专栏收录该内容

8 篇文章

订阅专栏

要求:

波士顿房价预测是一个经典的机器学习任务，类似于程序员世界的“Hello World”。

请利用机器学习方法完成波士顿房价的预测。

代码实现：

特征描述：

CRIM: 城镇人均犯罪率

ZN: 住宅用地所占比例

INDUS: 城镇中非住宅用地所占比例

CHAS: CHAS 虚拟变量,用于回归分

NOX: 环保指数

RM: 每栋住宅的房间数

AGE: 1940 年以前建成的自住单位的比例

DIS: 距离 5 个波士顿的就业中心的加权距离

RAD: 距离高速公路的便利指数

TAX: 每一万美元的不动产税率

PRTATIO: 城镇中的教师学生比例

B: 城镇中的黑人比例

LSTAT: 地区中有多少房东属于低收入人群

MEDV: 自住房屋房价中位数（也就是均价）

import numpy as np
import matplotlib.pyplot as plt
import pandas as pa

#建立线性回归模型
class linereturn():
    def fites(selfs, Datein, DateY, learnin=0.5, lamda=0.03):
        samplen, propertyn = Datein.shape
        DateX = np.c_[Datein, np.ones(samplen)]

        # 初始化待调参数theta
        selfs.theta = np.zeros([propertyn + 1, 1])        
        Mcount = int(1e8) 
        lbetter = 0 
        lJerr = int(1e8) 
        value = 1e-8  
        count = 10  

# 循环参数theta并打印
        for step in range(0, Mcount):
            predict = DateX.dot(selfs.theta)            
            Jth = sum((predict - DateY) ** 2) / (2 * samplen)           
            selfs.theta -= learnin * (lamda * selfs.theta + (DateX.T.dot(predict - DateY)) / samplen)            
            if Jth < lJerr - value:         
                lJerr = Jth
                lbetter = step
            elif step - lbetter > count:
                break
            if step % 50 == 0:
                print("step %s: %.6f" % (step, Jth))
    def predicted(selfs, X_input):
        samplen = X_input.shape[0]
        X = np.c_[X_input, np.ones(samplen, )]
        predict = X.dot(selfs.theta)
        return predict
    
    #将特征数据标准化为均匀分布
def standard (Xin):
    MinX = Xin.min(axis=0)
    MaxX = Xin.max(axis=0)
    X = (Xin - MinX) / (MaxX - MinX)
return X, MaxX, MinX

#将数据集中的样本属性进行分割并制作X和Y矩阵
def propertyl(pd_data):
    rown = pd_data.shape[0]
    columnn = len(pd_data.iloc[0, 0].split())
    X = np.empty([rown, columnn - 1])
    Y = np.empty([rown, 1])
    for i in range(0, rown):
        row_array = pd_data.iloc[i, 0].split()
        Y[i] = np.array(row_array[-1])
        X[i] = np.array(row_array[0:-1])
return X, Y

#在主函数中让训练集进行分离，并对X进行归一化处理，最后绘图
if __name__ == "__main__":
    data = pa.read_csv(r"C:\housing-data.csv")
    DateX, DateY = propertyl(data)    
    StandardDateX, Maxx, Minx =  standard (DateX)    
    model = linereturn()
    model.fites(StandardDateX, DateY)
    Dpredict = model.predicted(StandardDateX)
    Dpredict_error = sum((Dpredict - DateY) ** 2) / (2 * StandardDateX.shape[0])
    print("Test error is %d" % (Dpredict_error))
    print(model.theta)
    t = np.arange(len(Dpredict))
    plt.figure(facecolor='w')
    plt.plot(t, DateY, 'c-', lw=1.6, label=u'real price')
    plt.plot(t, Dpredict, 'm-', lw=1.6, label=u'estimate price')
    plt.legend(loc='best')
    plt.title(u'Boston house price', fontsize=18)
    plt.xlabel(u' ', fontsize=15)
    plt.ylabel(u'house price', fontsize=15)
    plt.grid()
    plt.show()