import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
# Install ...
!pip install xgboost
# Data load
df = pd.read_csv('./data/xxx.csv')
# Target data separation
y = df['y']
X = df.drop('y', axis = 1)
# Split train : test data set = 7:3 and set random seed
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size = 0.3,
random_state = 42)
# Define a model structure
xgbr = XGBRegressor(random_state = 42)
# Set the range of parameters
params = {
'max_depth' : [3, 5, 7, 9],
'n_estimators' : [50, 70, 90, 100],
'learning_rate' : [0.03, 0.01],
'subsample' : [0.7, 0.8, 0.9],
'colsample_bytree' : [0.7, 0.8, 0.9]
# 'reg_alpha' : [0, 1],
# 'reg_lambda' : [0, 1]
}
# Set the condition of grid search model
grid = GridSearchCV(estimator = xgbr,
param_grid = params,
scoring = 'r2',
n_jobs = -1, # 최대 사용
verbose = 2,
cv = 5)
# Train the grid search model
grid.fit(X_train, y_train)
# Confirm the optimal parameters
grid.best_params_
# Model prediction using those optimal parameters
best_pred = grid.predict(X_test)
# Error metric
print("r2 score:", r2_score(y_test, best_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, best_pred)))
No comments:
Post a Comment