机器学习回归算法ALG

  • Post author:
  • Post category:其他


机器学习回归算法



import numpy as np

import pandas as pd

from sklearn import datasets

from sklearn import preprocessing                    #预处理

from sklearn.model_selection import train_test_split #划分数据

from sklearn.model_selection import GridSearchCV     #网格搜索



#特征选择

from sklearn.linear_model import Lasso

from sklearn.linear_model import Ridge

from sklearn.ensemble import ExtraTreesRegressor



from sklearn.svm import SVR



from sklearn.tree import DecisionTreeRegressor                         #DT

from sklearn.ensemble import AdaBoostRegressor                    #Ada

from sklearn.ensemble import RandomForestRegressor           #RF

from sklearn.ensemble import GradientBoostingRegressor      #GBDT

from lightgbm import LGBMRegressor                                         #LGBM

from xgboost.sklearn import XGBRegressor                               #XGB



from sklearn.metrics import r2_score



import warnings

warnings.filterwarnings(‘ignore’)


def obtainFormatData():

dataset = datasets.load_boston()

featuresMatrix,y = dataset.data,dataset.target

print(‘======================================================’)

print(‘nums = {}        features = {}’.

format(featuresMatrix.shape[0],featuresMatrix.shape[1]))

print(‘======================================================’)

#最大最小化规约处理X = (X – min(X)) / (max(X) – min(X))

x = preprocessing.MinMaxScaler().fit_transform(featuresMatrix)

return x,y


#特征选择

def lassoFS(x,y):

print(‘FS based on L1:’)

for a in [0.1,0.2,0.3]:

clf = Lasso(alpha=a,random_state=1)

clf.fit(x,y)

index = list(np.where(clf.coef_!=0)[0])

print(‘A = {} FSI = {}’.format(a,index))

def ridgeFS(x,y):

print(‘FS based on L2:’)

for a in [0.1,0.5,0.8]:

clf = Ridge(alpha=a,random_state=1)

clf.fit(x,y)

index = list(np.where(clf.coef_>=0.1)[0])

print(‘A = {} FSI = {}’.format(a,index))

def treeModelFS(x,y):

clf = ExtraTreesRegressor(n_estimators=10,criterion=’mse’

,max_depth=None,random_state=1)

clf.fit(x,y)

scores = pd.Series(clf.feature_importances_).sort_values(ascending=False)

scores.plot.bar(rot=0,figsize=(8,3),title=’importance of features based on tree’)


#参数寻优

def SVRRegressionALG(x_train,x_test,y_train,y_test):

params = {‘C’:[0.01,0.1,1.0,10],

‘kernel’:[‘linear’,’rbf’,’sigmoid’],

‘epsilon’:[0.01,0.05,0.1]}

clf = GridSearchCV(estimator=SVR(),param_grid=params)

clf.fit(x_train,y_train)

best = clf.best_params_

print(‘SVM:’,best)

m = SVR(C=best[‘C’],kernel=best[‘kernel’],epsilon=best[‘epsilon’])

m.fit(x_train,y_train)

predict_train = m.predict(x_train)

predict_test = m.predict(x_test)

train_r2 = round(r2_score(y_train,predict_train),4)

test_r2 = round(r2_score(y_test,predict_test),4)

print(‘R2: Train = {} Test = {}’.format(train_r2,test_r2))


#树模型

def TreeModelRegressionALG(x_train,x_test,y_train,y_test):

models = {‘DT’:DecisionTreeRegressor(criterion=’mse’,splitter=’best’,

max_depth=None,min_samples_split=2,

max_features=None,max_leaf_nodes=None,

random_state=1),

‘Ada’:AdaBoostRegressor(DecisionTreeRegressor(random_state=1),n_estimators=50,

learning_rate=0.1),

‘RF’:RandomForestRegressor(n_estimators=50,criterion=’mse’,

max_depth=None,max_features=’auto’,

min_samples_split=2,min_samples_leaf=1,

random_state=1),

‘GBDT’:GradientBoostingRegressor(loss=’ls’,n_estimators=100,learning_rate=0.1,

subsample=0.9,max_features=None,

min_samples_split=2,min_samples_leaf=1,

random_state=1),

‘LGBM’:LGBMRegressor(boosting_type=’gbdt’,num_leaves=30,

n_estimators=100,learning_rate=0.1,

objective=’regression’,min_child_samples=20,

max_depth=-1),

‘XGB’:XGBRegressor(max_depth=3,learning_rate=0.1,

n_estimators=100,booster=’gbtree’,

subsample=1,objective=’reg:linear’,

reg_alpha=0,reg_lambda=1)

}

print(‘======================================================’)

for model in models:

clf = models[model]

clf.fit(x_train,y_train)

predict_train = clf.predict(x_train)

predict_test = clf.predict(x_test)

train_r2 = round(r2_score(y_train,predict_train),4)

test_r2 = round(r2_score(y_test,predict_test),4)

print(‘model: {}\nR2: Train = {} Test = {}’.format(model,train_r2,test_r2))

print(‘======================================================’)


x,y = obtainFormatData()


x_train,x_test,y_train,y_test = train_test_split(x,y,

test_size=0.2,random_state=1)

SVRRegressionALG(x_train,x_test,y_train,y_test)

TreeModelRegressionALG(x_train,x_test,y_train,y_test)



版权声明:本文为qinlan1994原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。