 
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression


# 自定义创建分类数据集（数据框表示）
# 参数 N 表示生成样本数量
def getClassficationData( N ):
  columns = ['x1', 'x2', 'x3', 'y']
  df = pd.DataFrame(columns=columns)
  for i in range(N):
    x1 = np.random.randint(10)
    x2 = np.random.randint(20)
    x3 = np.random.randint(30)
    
    y = "normal"
    if( (x1+x2+x3)>30 ):
      y ="high"
    elif( (x1+x2+x3)<15 ):
      y = "low"

    # 添加到数据框（以序号作为行索引）
    df.loc[i]= [x1, x2, x3, y]
  return df
# end of getClassficationData() ....


# 调用函数，生成分类数据集（100个样本）
df = getClassficationData(100)
X = df[ ['x1','x2','x3'] ]
Y = df[ ['y'] ]

# 现在目标变量 y 是分类型变量，需对其进行数字化编码（LabelEncoder()）
le = LabelEncoder()
y  = le.fit_transform( np.ravel(Y) )

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# 使用自助聚合法
dtc = DecisionTreeClassifier(criterion="entropy")
bag_model = BaggingClassifier(base_estimator=dtc, n_estimators=100, bootstrap=True)
bag_model = bag_model.fit(X_train, y_train)

# 预测新数据，并检查预测的准确率
y_test_pred = bag_model.predict(X_test)
score = bag_model.score(  X_test, y_test)
print( "决策树集成学习模型" )
print( "模型评分：", score )
matrix = confusion_matrix( y_test, y_test_pred)
print( "混淆矩阵：", matrix )
print( "*"*37 )

# 比较多个不同的基础评估器（弱学习器），检查准确率
lr  = LogisticRegression();
bnb = BernoulliNB()
gnb = GaussianNB()

base_methods=[lr, bnb, gnb, dtc]
for bm  in base_methods:
  print("基础模型: ", bm)
  bag_model = BaggingClassifier(base_estimator=bm,n_estimators=100,bootstrap=True)
  bag_model = bag_model.fit(X_train,y_train)
  y_test_pred = bag_model.predict(X_test)

  score = bag_model.score(  X_test, y_test)
  print( "模型评分：", score )

  matrix = confusion_matrix( y_test, y_test_pred)
  print( "混淆矩阵：", matrix )
  print( "-"*37 )
# end of for ...
 
