 
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

# 定义一个绘图函数，绘制三个图形
# 1）测试和训练学习曲线；
# 2）训练样本个数与训练时间的关系曲线；
# 3）训练时间与评分的关系曲线。
# 其中需要说明的参数
# axes ：形状shape为(3,)的数组，包含坐标轴Axis对象，用于绘制曲线
# ylim ：形状shape为(2,)的数组，定义了Y轴坐标的上限值和下限值
def plot_learning_curve(estimator, title, X, y, axes=None, ylim=None, cv=None,
                        n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5), font=None):
   
    if axes is None:
        _, axes = plt.subplots(1, 3, figsize=(20, 5))

    axes[0].set_title(title, fontproperties=font)
    if ylim is not None:
        axes[0].set_ylim(*ylim)
    axes[0].set_xlabel("训练样本个数", fontproperties=font)
    axes[0].set_ylabel("评分", fontproperties=font)  #"Score"

    train_sizes, train_scores, valid_scores, fit_times, _ = \
        learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs,
                       train_sizes=train_sizes,
                       return_times=True)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std  = np.std(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)
    valid_scores_std  = np.std(valid_scores, axis=1)
    fit_times_mean    = np.mean(fit_times, axis=1)
    fit_times_std     = np.std(fit_times, axis=1)

    # 绘制学习曲线
    axes[0].grid()
    axes[0].fill_between(train_sizes, train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.1,
                         color="r")
    axes[0].fill_between(train_sizes, valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std, alpha=0.1,
                         color="g")
    axes[0].plot(train_sizes, train_scores_mean, 'o-', color="r",
                 label="训练评分")
    axes[0].plot(train_sizes, valid_scores_mean, 'o-', color="g",
                 label="交叉验证评分")
    axes[0].legend(loc="best", prop=font)

    # 绘制样本个数与训练时间的关系曲线
    axes[1].grid()
    axes[1].plot(train_sizes, fit_times_mean, 'o-')
    axes[1].fill_between(train_sizes, fit_times_mean - fit_times_std,
                         fit_times_mean + fit_times_std, alpha=0.1)
    axes[1].set_xlabel("训练样本个数", fontproperties=font)
    axes[1].set_ylabel("训练时间", fontproperties=font)
    axes[1].set_title("模型伸缩性", fontproperties=font)

    # 绘制训练时间与评分的关系曲线
    axes[2].grid()
    axes[2].plot(fit_times_mean, valid_scores_mean, 'o-')
    axes[2].fill_between(fit_times_mean, valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std, alpha=0.1)
    axes[2].set_xlabel("训练时间", fontproperties=font)
    axes[2].set_ylabel("评分", fontproperties=font)
    axes[2].set_title("模型的性能", fontproperties=font)

    return plt


# 主程序
# 导入系统自带的手写数字图片数据集
X, y = load_digits(return_X_y=True)

#                              figsize设置图形的大小，10英寸宽，15英寸高
fig, axes = plt.subplots(3, 2, figsize=(10, 15))
plt.subplots_adjust(wspace=0.4, hspace=0.4)
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  # , size=16


title = "学习曲线(朴素贝叶斯)"
# 交叉验证划分组数（折数）为100(n_splits=100)，可以得到比较平滑的曲线
cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)

estimator = GaussianNB()
plot_learning_curve(estimator, title, X, y, axes=axes[:, 0], ylim=(0.7, 1.01),
                    cv=cv, n_jobs=4, font=font)

title = "学习曲线(SVM, 核函数RBF,γ=0.001)"
# 支持向量分类SVC是比较费时的，所以这里的交叉验证划分组数（折数）比较少(n_splits=10)
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
estimator = SVC(gamma=0.001)
plot_learning_curve(estimator, title, X, y, axes=axes[:, 1], ylim=(0.7, 1.01),
                    cv=cv, n_jobs=4, font=font)

plt.show()
 