df_train_X, df_train_Y = data_process(train_n)
train_data = lgb.Dataset(df_train_X, label=df_train_Y)
param = {'num_leaves': 10, 'num_trees': 50, 'objective': 'regression'}
gbm=lgb.train(param, train_data)
y_hat=gbm.predict(df_train_X)
MAE = np.mean(abs(y_hat - df_train_Y))
MSE = np.mean((y_hat - df_train_Y) ** 2)
R2 = 1-np.sum((y_hat - df_train_Y) ** 2)/np.sum((df_train_Y-np.mean(df_train_Y))**2)
1.lightgbm 做交叉验证
param = {'num_leaves': [10,20,30,40,50], 'num_trees': [50,100,200,250]}
gsearch = GridSearchCV(estimator=lgb.sklearn.LGBMRegressor(n_estimators=100,boosting_type='gbdt', objective='regression'),param_grid = param, scoring='neg_mean_absolute_error', cv=5,verbose = 20)
gsearch.fit(df_train_X, df_train_Y)
# modelfit(gsearch.best_estimator_, train, predictors)
print gsearch.grid_scores_, gsearch.best_params_, gsearch.best_score_