 
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import HuberRegressor, Ridge
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt

# 生成训练回归模型所用的训练数据（20个样本）.
rng = np.random.RandomState(0)
X, y = make_regression(n_samples=20, n_features=1, random_state=0, noise=4.0,
                       bias=100.0)

# 增加4个离群点
X_outliers = rng.normal(0, 0.5, size=(4, 1))
y_outliers = rng.normal(0, 2.0, size=4)

X_outliers[:2, :] += X.max() + X.mean() / 4.
X_outliers[2:, :] += X.min() - X.mean() / 4.
y_outliers[:2] += y.min() - y.mean() / 4.
y_outliers[2:] += y.max() + y.mean() / 4.

X = np.vstack((X, X_outliers))
y = np.concatenate((y, y_outliers))


# 绘制样本数据点
plt.figure('HuberRegressor')
plt.plot(X, y, 'b.')

# 对不同的epsilon值情况下的模型进行拟合
colors = ['r-', 'b-', 'y-', 'm-']  # 每条线颜色不同

x = np.linspace(X.min(), X.max(), 7)
epsilon_values = [1.35, 1.5, 1.75, 1.9]
for k, epsilon in enumerate(epsilon_values):
    huber = HuberRegressor(alpha=0.0, epsilon=epsilon)
    huber.fit(X, y)
    coef_ = huber.coef_ * x + huber.intercept_
    plt.plot(x, coef_, colors[k], label="胡贝尔回归(epsilon：%s)" % epsilon)


# 训练一个岭回归模型，展现与胡贝尔回归的区别
ridge = Ridge(alpha=0.0, random_state=0, normalize=True)
ridge.fit(X, y)
coef_ridge = ridge.coef_
coef_ = ridge.coef_ * x + ridge.intercept_


# 绘制图形
# 获得一个字体对象
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  # , size=16

plt.plot(x, coef_, 'g-', label="岭回归")

plt.title("胡贝尔回归与岭回归的比较", fontproperties=font)
plt.xlabel("X")
plt.ylabel("y")
plt.legend(loc=0, prop=font)
plt.show()
 
