DecitionTreeのパラメータを調整する。
まずはMaxDepthから
from sklearn.model_selection import LeaveOneOut from sklearn.metrics import accuracy_score MAX_DEPTH = 20 depths = range(1, MAX_DEPTH) loo_Y = d_train["Survived"].values loo_X = d_train[["Pclass", "Sex", "Age", "Fare", "Parch", "Embarked", "SibSp"]].values accuracy_scores = [] for depth in depths: predicted_labels = [] loo = LeaveOneOut() for train_index, test_index in loo.split(loo_X): X_train, X_test = loo_X[train_index], loo_X[test_index] y_train, y_test = loo_y[train_index], loo_y[test_index] clf = DecisionTreeClassifier(max_depth=depth) clf.fit(X_train, y_train) predicted_label = clf.predict(loo_X[test_index]) predicted_labels.append(predicted_label) score = accuracy_score(loo_Y, predicted_labels) print('max depth={0}: {1}'.format(depth, score))
max depth=1: 0.7867564534231201 max depth=2: 0.6936026936026936 max depth=3: 0.8181818181818182 max depth=4: 0.8237934904601572 max depth=5: 0.8181818181818182 max depth=6: 0.8103254769921436 max depth=7: 0.8215488215488216 max depth=8: 0.8249158249158249 max depth=9: 0.8204264870931538 max depth=10: 0.8148148148148148 max depth=11: 0.8058361391694725 max depth=12: 0.8002244668911336 max depth=13: 0.797979797979798 max depth=14: 0.7934904601571269 max depth=15: 0.7912457912457912 max depth=16: 0.7755331088664422 max depth=17: 0.77665544332211 max depth=18: 0.7833894500561167 max depth=19: 0.7744107744107744 MaxDepthは8を利用する。