@caoo
2018-12-14T11:32:21.000000Z
字数 1454
阅读 326
IRIS
决策树
DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
#Fixing a random seed
import random
random.seed(42)
iris = datasets.load_iris()
param_grid = {"base_estimator__criterion": ["gini", "entropy"],
"base_estimator__splitter": ["best", "random"],
"n_estimators": [1, 2]}
dtc = DecisionTreeClassifier()
ada = AdaBoostClassifier(base_estimator=dtc)
iris = datasets.load_iris()
X = iris.data[:]
y = iris.target
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#输出数据集大小
print ('原始数据集特征:',X.shape,
'训练数据集特征:',X_train.shape ,
'测试数据集特征:',X_test.shape)
print ('原始数据集标签:',y.shape,
'训练数据集标签:',y_train.shape ,
'测试数据集标签:',y_test.shape)
grid_search_ada = GridSearchCV(ada, param_grid=param_grid, cv=10)
grid_fit = grid_search_ada.fit(X, y)
# TODO: Get the estimator.
best_clf = grid_fit.best_estimator_
# Fit the new model.
best_clf.fit(X_train, y_train)
# Make predictions using the new model.
best_train_predictions = best_clf.predict(X_train)
best_test_predictions = best_clf.predict(X_test)
print(sum(best_test_predictions == y_test)) #预测结果与真实结果比对
print(metrics.classification_report(y_test,best_test_predictions))
print(metrics.confusion_matrix(y_test,best_test_predictions))
L1 = [n[0] for n in X_test]
L2 = [n[1] for n in X_test]
plt.scatter(L1,L2, c=test_predictions,marker='x')
plt.title('DecisionTreeClassifier')
plt.show()