LightGBM (parameter optimization)
LightGBMのパラメータチューニングを行っていきましょう。
1. Dataset
今回はscikit-learnのボストンの住宅価格データセットを利用します。
# ライブラリの読み込み
from keras.datasets import boston_housing
from sklearn.model_selection import train_test_split
# データを取得
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()
# early stopping用のデータを作成
X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.2, random_state=7)
eval_data = {'eval_set': [(X_eval, y_eval)]}
2. Parameter (NOT hyperparameter)
# パラメータを設定 (最適化を行わない)
params = {
'objective': 'regression',
'boosting_type': 'gbdt',
'random_state': 7,
'metric': 'rmse',
'n_estimators': 10000,
'verbose': 1,
'early_stopping_round': 10,
'learning_rate': 0.05
}
3. Hyperparameter
# ハイパーパラメータを設定 (最適化を行う)
cv_params = {
'max_depth': [5, 7, 9],
'num_leaves': [15, 31, 63],
'reg_alpha': [0.1, 0.5, 1.0],
'reg_lambda': [0.1, 0.5, 1.0],
'colsample_bytree': [0.3, 0.7, 1.0],
'subsample': [0.3, 0.7, 1.0],
'subsample_freq': [0, 5, 10],
'min_child_samples': [10, 20, 30]
}
4. Model making
# LightGBMのモデルを作成する
import lightgbm as lgb
model = lgb.LGBMRegressor(**params)
5. Grid search
from sklearn.model_selection import GridSearchCV
gridcv = GridSearchCV(model, cv_params)
gridcv.fit(X_train, y_train, **eval_data)
# 最適パラメータの表示と保持
best_params = gridcv.best_params_
best_score = gridcv.best_score_
print(f'最適パラメータ {best_params}\nスコア {best_score}')
# 最適モデルの取得
best_model = gridcv.best_estimator_
6. Prediction and evaluation
# 予測
y_pred = best_model.predict(X_test)
# 評価
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
print(mean_absolute_error(y_test, y_pred)) #MAE
print(mean_squared_error(y_test, y_pred)) #MSE
print(np.sqrt(mean_squared_error(y_test, y_pred))) #RMSE
print(r2_score(y_test, y_pred)) #R^2 best_model.score(X_test, y_test)も同じ意味
7. Referenced page