 
import numpy as np
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.utils import shuffle

def getStopReason(stopCode):
  if (stopCode=="no_change"):
    return "已经不能再生成新的标签。"
  elif (stopCode=="max_iter"):
    return "已经达到最大迭代次数。"
  else:
    return "已经标注所有为标记数据。"

# end of getStopReason()


# 导入数据，共有569个样本，30个特征变量
X, y = datasets.load_breast_cancer(return_X_y=True)
# 打乱原始数据的顺序
X, y = shuffle(X, y, random_state=42)
y_true = y.copy()

# 只保留前50个样本的目标变量，其余519个样本设置为未标记
y[50:] = -1

# 创建一个基础分类器
base_clf = SVC(probability=True, gamma=0.001, random_state=42)
self_trn_clf = SelfTrainingClassifier(base_clf, threshold=0.7)
self_trn_clf.fit(X,y)

print("基础分类器  ：\n", self_trn_clf.base_estimator_, "\n")
print("类别标签值  ：\n", self_trn_clf.classes_, "\n")
print("最终样本标签：\n", self_trn_clf.transduction_, "\n")
#print("样本标记次数：\n", self_trn_clf.labeled_iter_, "\n")
print("分类迭代次数：\n", self_trn_clf.n_iter_, "\n")
stopCode = self_trn_clf.termination_condition_
print("分类终止原因：\n", getStopReason(stopCode ), "\n")
 
