import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
# Data load
df = pd.read_csv('./data/xxx.csv')
# Target data separation
y = df['y']
X = df.drop('y', axis = 1)
# Split train : test data set = 7:3 and set random seed
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size = 0.3,
random_state = 42)
# Define a model structure
dtr = DecisionTreeRegressor(max_depth = 16, random_state = 42)
# Train model
dtr.fit(X_train, y_train)
# Model prediction
dtr_pred = dtr.predict(X_test)
# Error metric
print("Ridge-r2 score:", r2_score(y_test, dtr_pred))
print("Ridge-RMSE:", np.sqrt(mean_squared_error(y_test, dtr_pred)))
# Plot
plt.bar(X_train.columns, dtr.feature_importances_)
plot_tree(dtr);
No comments:
Post a Comment