 
import time 
import numpy as np
from sklearn.linear_model import TheilSenRegressor
from sklearn.linear_model import LinearRegression,RANSACRegressor
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt


estimators = [('Theil-Sen', TheilSenRegressor(random_state=42)),
              ('RANSAC', RANSACRegressor(random_state=42)),
              ('OLS', LinearRegression()),]
colors = {'Theil-Sen': 'red', 'RANSAC': 'green', 'OLS': 'yellow'}


# 生成20O个样本点，其中离群点在X（特征变量）方向
n_samples = 200

np.random.seed(0)
# Linear model y = 3*x + N(2, 0.1**2)
x = np.random.randn(n_samples)
noise = 0.1 * np.random.randn(n_samples)
y = 3 * x + 2 + noise

# 使其中10%的点为离群点（outliers）
x[-20:] = 9.9
y[-20:] += 22
X = x[:, np.newaxis]


# 绘制图形
# 获得一个字体对象
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  # , size=16

plt.figure('TheilSenRegressor')
plt.scatter(x, y, color='indigo', marker='x', s=40)

line_x = np.array([-3, 10])
for name, estimator in estimators:
    t0 = time.time()
    estimator.fit(X, y)
    elapsed_time = time.time() - t0
    y_pred = estimator.predict(line_x.reshape(2, 1))
    plt.plot(line_x, y_pred, color=colors[name], linewidth=2,
             label='%s (拟合时间: %.2fs)' % (name, elapsed_time))

plt.axis('tight')
plt.legend(loc='best', prop=font)
plt.title("特征变量出现离群点时的回归模型比较", fontproperties=font)
plt.show()
 
