 
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier

# 导入手写数据集
X, y = datasets.load_digits(return_X_y=True)

# 各种分类评估器（包括同一种分类器的不同参数设置）
classifiers = [
    ("SGD", SGDClassifier(max_iter=100)),
    ("ASGD", SGDClassifier(average=True)),
    ("Perceptron", Perceptron()),
    ("hinge loss", PassiveAggressiveClassifier(loss='hinge',
                                                         C=1.0, tol=1e-4)),
    ("squared_hinge loss", PassiveAggressiveClassifier(loss='squared_hinge',
                                                          C=1.0, tol=1e-4)),
    ("SAG", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))
]

# 设置测试数据比例
heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
rounds = 20
xx = 1. - np.array(heldout)

for name, clf in classifiers:
    print("training %s" % name)
    rng = np.random.RandomState(42)
    yy = []
    for i in heldout:
        yy_ = []
        for r in range(rounds):
            X_train, X_test, y_train, y_test = \
                train_test_split(X, y, test_size=i, random_state=rng)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            yy_.append(1 - np.mean(y_pred == y_test))
        # end of for r ...
        yy.append(np.mean(yy_))
    # end of for i in ...
    
    plt.plot(xx, yy, label=name)

### 构建一个字体对象，以使pyplot支持中文
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  #, size=16)

plt.legend(loc="upper right", prop=font)
plt.xlabel("训练数据集比例", fontproperties=font)
plt.ylabel("测试误差", fontproperties=font)
plt.show()
 
