import pandas

#0 导入鸢尾花数据集iris.csv
iris_df = pandas.read_csv("iris.csv")

#1.1 获得训练数据集
iris_X = iris_df[iris_df.columns.difference(["Species"])]
iris_y = iris_df["Species"]

from sklearn_pandas import DataFrameMapper
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn2pmml.decoration import ContinuousDomain
from sklearn2pmml.pipeline import PMMLPipeline

#1.2 声明PMMLPipeline对象
pipeline = PMMLPipeline([
	("mapper", DataFrameMapper([
		(["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"], [ContinuousDomain(), SimpleImputer()])
	])),
	("pca", PCA(n_components = 3)),
	("selector", SelectKBest(k = 2)),
	("classifier", LogisticRegression(multi_class = "ovr"))
])

#1.3 拟合（训练）管道模型
pipeline.fit(iris_X, iris_y)

#1.4 验证
pipeline.verify(iris_X.sample(n = 15))

#1.5 准备把训练的模型pipeline输出到本地PMML文件
from sklearn2pmml import sklearn2pmml

#1.6 保存PMML文件
sklearn2pmml(pipeline, "E:\\MODLES\\LogisticRegressionIris.pmml", with_repr = True)


# 其他工作


#2.1 导入并转回scikit-learn模型
from pypmml import Model

model = Model.fromFile("E:\\MODLES\\LogisticRegressionIris.pmml")

#2.2 使用模型进行预测
Y_pred = model.predict(iris_X)
print(Y_pred)


