 
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline


#1 导入波士顿房价数据集，并划分训练集和测试集
X, y = load_boston(return_X_y=True)    # 据说由于种族歧视原因，不再建议使用此数据
X_train, X_test, y_train, y_test = train_test_split(X, y)

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge

# 声明标准化处理、主成分分析、岭回归对象
scaler = StandardScaler()  # step 1
pca    = PCA()   # step 2
ridge= Ridge()   # step 3

# 声明一个管道对象
# X_train = scaler.fit_transform(X_train)
# X_train = pca.fit_transform(X_train)
# ridge.fit(X_train, y_train)
pipe = Pipeline([
        ('scaler', scaler),
        ('reduce_dim', pca),
        ('regressor', ridge)
        ])

# 定义GridSearchCV的参数
n_features_to_test = np.arange(1, 7)
alpha_to_test = np.arange(-12, +12)
params = {'reduce_dim__n_components': n_features_to_test,
          'regressor__alpha': alpha_to_test}

# 网格搜索，参数寻优
from sklearn.model_selection import GridSearchCV
gridsearch = GridSearchCV(pipe, params, verbose=1)
gridsearch.fit(X_train, y_train)

print("\n基于训练数据集，搜索到的最佳超参数组合是：")  
print(gridsearch.best_params_)

#
print("\n基于测试数据集的评分是：", gridsearch.score(X_test, y_test))  
 
