随机森林填补乳腺癌数据的n estimators参数调优
data = load_breast_cancer()
# 确定n_estimators的取值范围
n_estimators_range = [i*10 for i in range(1, 13)]
score_list = []
# 观察n_estimators的变化对整体准确率的影响
for n in n_estimators_range:
rfc = RandomForestClassifier(n_estimators=n, n_jobs=1, random_state=90)
score = cross_val_score(rfc, data.data, data.target, cv=10).mean()
score_list.append(score)
# 找到最高得分及对应的n_estimators值
max_score = max(score_list)
max_index = score_list.index(max_score)
optimal_n_estimators = n_estimators_range[max_index]
plt.plot(n_estimators_range, score_list)
plt.xlabel("n_estimators")
plt.ylabel("accuracy")
plt.title("Impact of n_estimators on accuracy")
plt.show()
下载地址
用户评论