@caoo
2018-12-14T11:32:21.000000Z
字数 1454
阅读 383
IRIS 决策树 DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifierfrom sklearn.ensemble import AdaBoostClassifierfrom sklearn.model_selection import GridSearchCVfrom sklearn.model_selection import train_test_split#Fixing a random seedimport randomrandom.seed(42)iris = datasets.load_iris()param_grid = {"base_estimator__criterion": ["gini", "entropy"],"base_estimator__splitter": ["best", "random"],"n_estimators": [1, 2]}dtc = DecisionTreeClassifier()ada = AdaBoostClassifier(base_estimator=dtc)iris = datasets.load_iris()X = iris.data[:]y = iris.target# Split the data into training and testing setsX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)#输出数据集大小print ('原始数据集特征:',X.shape,'训练数据集特征:',X_train.shape ,'测试数据集特征:',X_test.shape)print ('原始数据集标签:',y.shape,'训练数据集标签:',y_train.shape ,'测试数据集标签:',y_test.shape)grid_search_ada = GridSearchCV(ada, param_grid=param_grid, cv=10)grid_fit = grid_search_ada.fit(X, y)# TODO: Get the estimator.best_clf = grid_fit.best_estimator_# Fit the new model.best_clf.fit(X_train, y_train)# Make predictions using the new model.best_train_predictions = best_clf.predict(X_train)best_test_predictions = best_clf.predict(X_test)print(sum(best_test_predictions == y_test)) #预测结果与真实结果比对print(metrics.classification_report(y_test,best_test_predictions))print(metrics.confusion_matrix(y_test,best_test_predictions))L1 = [n[0] for n in X_test]L2 = [n[1] for n in X_test]plt.scatter(L1,L2, c=test_predictions,marker='x')plt.title('DecisionTreeClassifier')plt.show()