import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import plot_tree
# Data load
from sklearn.datasets import load_iris
# Data separation
X = pd.DataFrame(iris['data'], columns = iris['feature_names'])
y = iris['target']
# Split train : test data set = 7:3 and set random seed
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size = 0.3,
random_state = 42,
stratify = y)
# Define a model structure
dtc = DecisionTreeClassifier(max_depth = 3, random_state = 42)
# Train model
dtc.fit(X_train, y_train)
# Model prediction
dtc_pred = dtc.predict(X_test)
# Error metric
print(confusion_matrix(y_test, dtc_pred))
print(classification_report(y_test, dtc_pred))
# Confirm coefficient
# Plot
plt.bar(X_train.columns, dtc.feature_importances_)
plt.figure(figsize=(8, 8))
plot_tree(dtc,
feature_names = iris['feature_names'],
class_names = iris['target_names'],
filled = True
);