大lao们,能不能帮我看一下这串代码,决策树预测,但我看不太懂,帮我注解一下,提前万分感谢
这个决策树预测是放在flask中app.py中的,用的是xgboost模块,预测的是汽车的价格,代码可能有点残,我不知道该截多少,帮我稍微瞅一眼吧,可怜我这个决策树的菜鸟吧,
def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
all_y = dataset['最低指导价'].values
del dataset['最低指导价']
del dataset['最高指导价']
all_x = dataset.values
df_columns = dataset.columns.values
print('---> cv train to choose best_num_boost_round')
all_y = np.log1p(all_y)
dtrain = xgb.DMatrix(all_x, label=all_y, feature_names=df_columns)
xgb_params = {
'learning_rate': 0.01,
'max_depth': 4,
'eval_metric': 'rmse',
'objective': 'reg:linear',
'nthread': -1,
'silent': 1,
'booster': 'gbtree'
}
cv_result = xgb.cv(dict(xgb_params),
dtrain,
num_boost_round=4000,
early_stopping_rounds=100,
verbose_eval=100,
show_stdv=False,
)
best_num_boost_rounds = len(cv_result)
mean_train_logloss = cv_result.loc[best_num_boost_rounds -
11: best_num_boost_rounds - 1, 'train-rmse-mean'].mean()
mean_test_logloss = cv_result.loc[best_num_boost_rounds -
11: best_num_boost_rounds - 1, 'test-rmse-mean'].mean()
print('best_num_boost_rounds = {}'.format(best_num_boost_rounds))
print('mean_train_rmse = {:.7f} , mean_valid_rmse = {:.7f}\n'.format(
mean_train_logloss, mean_test_logloss))
print('---> training on total dataset to predict test and submit')
model = xgb.train(dict(xgb_params),
dtrain,
num_boost_round=best_num_boost_rounds)
# 特征重要程度
feature_importance = model.get_fscore()
feature_importance = sorted(
feature_importance.items(), key=lambda d: d[1], reverse=True)
print(feature_importance)
print(df_columns)
@app.route('/get_all_unique_values/<key>')
def get_all_unique_values(key):
values = list(set(ori_dataset[key]))
if '' in values:
values.remove('')
if '未知' in values:
values.remove('未知')
return jsonify(values)
@app.route(
'/predict_car_price/<pinpai>/<pingfen>/<jibie>/<cheshenjiegou>/<fadongji>/<biansux>/<xuhanglich>/<diandongji>')
def predict_car_price(pinpai, pingfen, jibie, cheshenjiegou, fadongji, biansux, xuhanglich, diandongji):
"""预测汽车的价格"""
test_x = [
brand_map[pinpai],
float(pingfen),
jibie_map[jibie],
jiegou_map[cheshenjiegou],
fadongji_map(fadongji),
biansuxiang_map[biansux],
xuhang(xuhanglich),
diandongji_map(diandongji)
]
dtest = xgb.DMatrix(test_x, feature_names=df_columns)
predict_price = model.predict(dtest)[0]
predict_price = np.expm1(predict_price)
return jsonify({
'predict_price': float(predict_price)
})