
from sklearn.linear_model import SGDRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
from sklearn.metrics import mean_squared_error
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt

#1 波士顿房屋价格数据集(共506个样本数据)
print("波士顿房价预测...")
boston = load_boston()
X, y = boston.data, boston.target

#2 Z-Score数据标准化
X = scale(X)
y = scale(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15)

# 使用默认的 均方误差（squared_loss）损失函数
sgdr = SGDRegressor(alpha=0.0001, epsilon=0.01, eta0=0.1, penalty='elasticnet')
sgdr.fit(X_train, y_train)

# 线性回归模型的各种度量指标
score = sgdr.score(X_train, y_train)
print("拟合优度（R-squared）:", score)

y_pred = sgdr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("      均方误差（MSE）:", mse)


# 绘制图形
plt.figure()
# 通过这种方式可以局部设置字体（支持中文），不影响绘图其他部分
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  # , size=16

x_ax = range( len(y_test) )
plt.plot(x_ax, y_test, label="观测值")
plt.plot(x_ax, y_pred, label="预测值")
plt.title("波士顿房价数据（测试和预测）", fontproperties=font)

plt.xlabel('样本序号', fontproperties=font)
plt.ylabel('房屋价格', fontproperties=font)

plt.legend(loc='best',fancybox=True, shadow=True, prop=font)
plt.grid(True)
plt.show()
 
