 
# 寻找最优超参数
import numpy as np
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split 
    
print("使用KFold()，寻找最优超参数")
print("*"*30)
# 导入糖尿病数据
diabetes_Bunch = datasets.load_diabetes()
X = diabetes_Bunch.data
y = diabetes_Bunch.target    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

kfold = KFold(n_splits=7, shuffle=True, random_state=0)
    
# 岭参数集合
alphaSet = [0.001, 0.005, 0.01, 0.1, 1, 10, 100]
result_scores = []
    
for alpha in alphaSet:
  R2_scores = []
      
  for train, validate in kfold.split(X_train,y_train):
    ridgeRegr = Ridge(alpha = alpha)  # 创建岭回归模型
    ridgeRegr.fit(X_train[train],y_train[train])  # 拟合模型
        
    y_pred_val = ridgeRegr.predict(X_train[validate])  # 预测
    y_test_val = y_train[validate]
    # 计算拟合优度指标
    R2 = metrics.r2_score(y_test_val, y_pred_val)
    R2_scores.append( R2 )
  # end of for k ...
       
  result_scores.append( np.mean(R2_scores) )  # 添加拟合优度均值
  print("alpha = %7.3f" %(alpha), ",R2 = ",np.mean(R2_scores))
  print("-"*30)
# end of for alpha loop ...
    
# 这里以拟合优度指标均值最大时对应的岭参数alpha为最优超参数
r2_Max = max(result_scores)
iIndex = result_scores.index(r2_Max)
bestAlpha = alphaSet[iIndex]
print("最好的评分  ：", r2_Max)
print("最佳的alpha ：", bestAlpha)

# 此时，可以设置alpha=bestAlpha，训练最后的模型
print("\n使用最佳超参数alpha和全部训练数据构建模型：")
bestRidge = Ridge(alpha = bestAlpha)
bestRidge.fit(X_train, y_train)  # 以全部训练数据集拟合模型
print(bestRidge)
    
y_pred = bestRidge.predict(X_test)  # 对测试数据进行预测
# 计算最后的拟合优度指标
R2 = metrics.r2_score(y_test, y_pred)
print("最后的模型指标：", R2)
 