 
import numpy as np
from sklearn import linear_model, datasets
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt

n_samples = 1000
n_outliers = 50

X, y, coef = datasets.make_regression(n_samples=n_samples, n_features=1,
                                      n_informative=1, noise=10,
                                      coef=True, random_state=0)

# 制造离群点（outlier）
np.random.seed(0)
X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))
y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)

# 使用所有的数据进行回归（这里使用了最小二乘法回归模型LinearRegression）
lr = linear_model.LinearRegression()
lr.fit(X, y)

# 鲁棒回归（随机抽样一致性算法RANSAC）
# base_estimator=sklearn.linear_model.LinearRegression()
ransac = linear_model.RANSACRegressor()
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)

# Predict data of estimated models
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_ransac = ransac.predict(line_X)

# Compare estimated coefficients
print("Estimated coefficients (true, linear regression, RANSAC):")
print(coef, lr.coef_, ransac.estimator_.coef_)

# 获得一个字体对象
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  # , size=16

plt.figure()
plt.scatter(X[inlier_mask], y[inlier_mask], color='yellowgreen', marker='.', label='内点(Inliers)')
plt.plot(line_X, line_y_ransac, color='green', linewidth=2, label='RANSAC评估')

plt.scatter(X[outlier_mask], y[outlier_mask], color='gold', marker='.', label='离群点(Outliers)')
plt.plot(line_X, line_y, color='gold', linewidth=2, label='最小二乘法回归')

plt.legend(loc='best', prop=font)
plt.xlabel("X")
plt.ylabel("y")

plt.axis('tight')
plt.show()
 
