import numpy as np
import pandas as pd
data=pd.read_csv('biao.csv',encoding='gbk')
#123条数据 8个特征 1个代号
#使用sklearn.cross_validation中的train_test_split模块用于数据分割
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
#随机采取25%用于测试数据,75%用于构建训练集合
X_train,X_test,y_train,y_test=train_test_split(data['进货总价税合计','销售总价税合计','是否违约_否','是否违约_是'],
data['信誉评级_A','信誉评级_B','信誉评级_C','信誉评级_D'],
test_size=0.25,
random_state = 33)
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import _logisticRegression
#标准化数据,保证每个维度的特征数据方差为1,均值为0,。使预测结果不为某些较大维度参数影响
ss = StandardScaler()
#创建对象
X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)
#初始化logisticRegression 逻辑回归
lr = logisticRegression()
#调用logisticRegression中的fit函数训练模型参数
lr.fit(X_train,y_train)
#使用训练好的模型lr对X_test进行预测,结果储存在lr_y_predict中
lr_y_predict =lr.predict(X_test)