我的数据整理如下:
其中multi_pre,multi_post,insitu,cN,ycN,ypT,ypN,grading,ER,PGR都是factor类型
想建立10折的交叉验证的逻辑回归模型,由其他变量来预测ypN,代码如下:
#recipe
recipe <- recipe (ypN ~ age + diameter_pre + diameter_post + multi_pre + multi_post + insitu
+ cN + LN_pre + ycN + LN_post + grading + ER + PGR + HER2NEU + K67, data = dba)
recipe <- recipe %>%
step_impute_knn(all_predictors(), neighbors = 5) %>%
step_BoxCox(all_numeric(),-all_outcomes()) %>%
step_zv(all_predictors(),-all_outcomes()) %>%
step_nzv(all_predictors(),-all_outcomes())%>%
step_normalize(all_numeric(),-all_outcomes())%>%
step_dummy(all_nominal(),-all_outcomes()) %>%
step_corr(all_predictors(),-all_outcomes(), threshold = 0.9)
prep <- prep (recipe, dba)
tidy (prep)
tidy (prep, number= 3)
prep[["template"]]
MySummary <- function (data, lev = NULL, model = NULL){
a1 <- defaultSummary(data, lev, model)
b1 <- twoClassSummary(data, lev, model)
c1 <- prSummary(data, lev, model)
out <- c(a1, b1, c1)
out }
cv <- trainControl (method = "repeatedcv",
number = 10,
repeats = 3,
search = "grid",
verboseIter= TRUE,
classProbs = TRUE,
returnResamp = "final",
savePredictions = "final",
summaryFunction = MySummary,
selectionFunction = "tolerance",
allowParallel=TRUE)
hyper_grid_glm <- expand.grid(
alpha = seq(from=0.01, to= 1, by=0.01),
lambda = seq(from=0.01, to= 1, by=0.01) )
cv_glm <- caret::train(recipe,
data=dba,
method="glmnet",
metric="Kappa",
trControl=cv,
tuneGrid = hyper_grid_glm)
结果报错如下:
求解决,谢谢!