 
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

#1. 定义数据文件中的列名称
col_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
#1.1 导入糖尿病的数据，第一行为列名称
diabets = pd.read_csv("mydiabetes.csv", header=None, names=col_names, skiprows=1)

#2. 特征变量的选择
feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = diabets[feature_cols] # Features
y = diabets.Outcome       # Target variable

#3. 对原始数据集进行划分，使之成为训练数据集和测试数据集两部分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

#4. 定义逻辑回归分类评估器对象，并拟合
logreg = LogisticRegression(solver="liblinear")
logreg.fit(X_train, y_train)

#5. 预测，并构造混淆矩阵
y_pred = logreg.predict(X_test)
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print(cnf_matrix) # 混淆矩阵
print()

#6. 输出度量指标
print("*"*37)
print("准确率（Accuracy） ：", metrics.accuracy_score(y_test, y_pred))
print("查准率（Precision）：", metrics.precision_score(y_test, y_pred))
print("召回率（Recall）   ：", metrics.recall_score(y_test, y_pred))
 
