 
# 寻找最优超参数
import numpy as np
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
import sklearn.metrics as metrics

print("使用KFold()，寻找最优超参数")
print("*"*30)
# 导入糖尿病数据
diabetes_Bunch = datasets.load_diabetes()
X = diabetes_Bunch.data
y = diabetes_Bunch.target

kfold = KFold(n_splits=7, shuffle=True, random_state=0)

# 岭参数集合
alphaSet = [0.001, 0.005, 0.01, 0.1, 1, 10, 100]
result_scores = []

for alpha in alphaSet:
  R2_scores = []
  
  for train, test in kfold.split(X,y):
    ridgeRegr = Ridge(alpha = alpha)  # 创建岭回归模型
    ridgeRegr.fit(X[train],y[train])  # 拟合模型
    
    y_pred = ridgeRegr.predict(X[test])  # 预测
    y_test = y[test]
    # 计算拟合优度指标
    R2 = metrics.r2_score(y_test, y_pred)
    R2_scores.append( R2 )
  # end of for k ...
   
  result_scores.append( np.mean(R2_scores) ) # 添加拟合优度均值
  print("alpha = %7.3f" %(alpha), ",R2 = ",np.mean(R2_scores))
  print("-"*30)
# end of for alpha ...

# 这里以拟合优度指标均值最大时对应的岭参数alpha为最优超参数
r2_Max = max(result_scores)
iIndex = result_scores.index(r2_Max)
print()
print("最佳评分是：", r2_Max)
print("对应alpha ：", alphaSet[iIndex])
 
