import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
# 1. 数据加载与划分
data = load_breast_cancer()
X = data.data
y = data.target
# 划分训练集和测试集,设置随机种子确保可重复性
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 2. 使用C4.5决策树进行训练(C4.5在sklearn中用DecisionTreeClassifier基于信息增益比实现,和C4.5类似)
clf = DecisionTreeClassifier(criterion='entropy') # 基于信息增益比(类似C4.5的思想)
clf.fit(X_train, y_train)
# 3. 模型评估
y_pred = clf.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print("分类准确率:", accuracy)
# 4. 决策树可视化
plt.figure(figsize=(15, 10))
plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names, filled=True, rounded=True, dpi=150)
plt.show()