我用toad包构建了一个评分卡模型,我想对他进行改造,我把里面用的逻辑回归模型换成了别的模型,但是我发现不管我运不运行这个a模型,toad都可以直接出一个评分卡结果,难道我这个改写是错误的吗?toad是内置的逻辑回归,我是没法简单改写吗?
import pandas as pd
train=pd.read_csv("训练集.csv",index_col=0)
test=pd.read_csv("测试集.csv",index_col=0)
Xtr=train.loc[:,"ficoRangeLow":"n14"]
Ytr=train.loc[:,"isDefault"]
Xts=test.loc[:,"ficoRangeLow":"n14"]
Yts=test.loc[:,"isDefault"]
data_tr = pd.concat([Xtr,Ytr],axis=1)
data_tr['type'] = 'train'
data_ts = pd.concat([Xts,Yts],axis=1)
data_ts['type'] = 'test'
import toad
toad.detector.detect(data_tr).columns
toad.detector.detect(data_tr)
quality = toad.quality(data_tr,'isDefault',iv_only=True)
quality.sort_values('iv',ascending=False)
selected_data, dropped = toad.selection.select(data_tr,target = 'isDefault', empty = 0.5, iv = 0.02, corr = 0.9, return_drop=True,exclude=['type'])
print(dropped)
print(selected_data.shape)
quality = toad.quality(selected_data,'isDefault',iv_only=True)
quality.sort_values('iv',ascending=False)
ivzhi=quality.sort_values('iv',ascending=False)
ivzhi.to_csv("iv值排序.csv")
combiner = toad.transform.Combiner()
combiner.fit(selected_data, y = 'isDefault', method = 'chi', min_samples = 0.05) #empty_separate = False
from toad.plot import bin_plot
for i in range(0,14,1):
col = selected_data.columns[i]
bin_plot(combiner.transform(selected_data[[col,'isDefault']], labels=True), x=col, target='isDefault')
bins = combiner.export()
selected_test = data_ts[selected_data.columns]
combiner.set_rules(bins)
binned_data = combiner.transform(selected_data)
transer = toad.transform.WOETransformer()
data_tr_woe = transer.fit_transform(binned_data, binned_data['isDefault'], exclude=['isDefault','type'])
data_ts_woe = transer.transform(combiner.transform(selected_test))
Xtr_woe = data_tr_woe.drop(['isDefault','type'],axis=1)
Ytr_woe = data_tr_woe['isDefault']
Xts_woe = data_ts_woe.drop(['isDefault','type'],axis=1)
Yts_woe = data_ts_woe['isDefault']
import autogluon
from autogluon.tabular import TabularDataset,TabularPredictor
import pandas as pd
import numpy as np
label='isDefault'
train_data=TabularDataset(data_tr_woe.drop(["type"],axis=1) )
metric = 'roc_auc'
time_limit=60
predictor=TabularPredictor(label=label,eval_metric=metric).fit(train_data,presets='best_quality',time_limit=time_limit,auto_stack=True)
test_data=TabularDataset(data_ts_woe.drop(['isDefault','type'],axis=1))
predictor.predict_proba(train_data)
train_proba=predictor.predict_proba(train_data)
train_proba=train_proba.values[:,1]
train_proba = np.array(train_proba).flatten()
predictor.predict_proba(test_data)
auto_proba=predictor.predict_proba(test_data)
auto_proba=auto_proba.values[:,1]
auto_proba = np.array(auto_proba).flatten()
psi = toad.metrics.PSI(data_tr_woe,data_ts_woe)
psi.sort_values(0,ascending=False)
psizhi=psi.sort_values(0,ascending=False)
psizhi.to_csv("psi值.csv")
tr_bucket = toad.metrics.KS_bucket(train_proba,Ytr,bucket=10,method='quantile')
tr_bucket
selected_data
x_card=selected_data.loc[:,"ficoRangeLow":"n14"]
y_card=selected_data.loc[:,"isDefault"]
card = ScoreCard(combiner=combiner,
transer=transer,
C=0.1,
class_weight='balanced',
base_score=600,
base_odds=1,
pdo=50,
rate=2)
# 使用评分卡模型进行拟合
card.fit(x_card,y_card)
最后评分卡生成的这一步和我上面调试的模型没有任何关系,请问该如何改写才能让评分卡的构建基于我的模型。