圈圈456 2022-06-15 17:02 采纳率: 90.9%
浏览 243
已结题

R语言中LightGBM实践问题

问题遇到的现象和发生背景

R语言LightGBM包进行多分类,使用的是tif数据,但是得到的结果非常差,原先使用python实现的时候得到的准确度很正常,R中也不知道是什么原因导致的这种现象。我是先随机选取60个区块的数据当做初始数据集,然后进行训练预测,将准确度最低的5个区数据重新加入到训练数据集中,然后再次训练,直至迭代完成。我原先在python中使用相同数据进行相同学习器进行训练,得到的结果并不相同。

问题相关代码,请勿粘贴截图

但是训练出的模型不管是用于训练集还是测试集,得到的预测结果都非常低,不管尝试多少次都只有20%一下:

img

library(raster)
library(sampling)
library(stringr)
library(lightgbm)
library(dplyr)
library(base)
library(tiff)
library(paradox)
library(mlr3extralearners)
library(mlr3verse)
library(mlr3tuning)


# 读取选取tif

readtif_vector <- function(x){
  # 将读取到的tif转化为向量
  label <- raster(x)
  label = as.matrix(label)
  label <- as.vector(t(label))
  return(label)
}

readfeature <- function(pathDir,sample){
  path = pathDir
  i = j = 1
  tif = matrix(ncol = 16, nrow = 1)
  while(i <= length(sample[[1]])){
    data = matrix(ncol = 1,nrow = 1476)
    pathsampel = paste(path, sample[[1]][i], sep = '\\')
    while(j <= 15){
      path_i = dir(pathsampel)
      k = readtif_vector(paste(pathsampel, path_i[j], sep = '\\'))
      data = cbind(data,k)
      j = j + 1
    }
    j = 1
    i = i + 1
    tif = rbind(tif,data)
  }
  tif <- tif[-1,-1]
  colnames(tif) <- c(1:15)
  return(tif)
}

readlabel <- function(pathDir, sample){
  path = pathDir
  tif = matrix(ncol = 2, nrow = 1)
  i = j = 1
  while(i <= length(sample[[1]])) {
    data = matrix(ncol = 1,nrow = 1476)
    while(j <= length(dir(path))){
      if(paste('dilei',sample[[1]][i],'.tif',sep='')==dir(path)[j]){
        k = readtif_vector(paste(path,dir(path)[j],sep = '\\'))
        data = cbind(data,k)
      }
      j = j + 1
    }
    j = 1
    i = i + 1
    tif = rbind(tif, data)
  }
  tif <- tif[-1,-1]
  return(tif)
}

readtestfeature <- function(pathDir){
  getfeaturetestname = list.files(pathDir, pattern = "*.tif$", full.names = TRUE)
  tif = matrix(nrow=165312,ncol=1)
  i = 1
  while(i <= length(getfeaturetestname)){
    k = readtif_vector(getfeaturetestname[i])
    tif = cbind(tif, k)
    i = i + 1
  }
  tif = tif[,-1]
  colnames(tif) <- c(1:15)
  return(tif)
}

# 保存图像tif
orginal_param <- raster('D:\\Personality\\paper\\GBDT\\label\\dilei.tif')
save_tif <- function(preds, iteration){
  t <- matrix(preds$response,nrow = 252,ncol=656,byrow = TRUE) %>% as.numeric()
  t2<-raster(ncol = 252, nrow = 656, 
             resolution = c(0.0007515805, 0.0007515805),
             ext = extent(orginal_param), vals = t)
  output = paste('D:\\Personality\\paper\\GBDT\\R\\',iteration,'.tif')
  writeRaster(t2,output,overwrite = TRUE)
}


# 概率选区
probability_block <- function(lgb.pred){
  n = 41 * 36
  i = 1
  result = c()
  test <- as.matrix(y_test,ncol=1)
  while(i <= 165312){
    k = i+n-1
    a <- test[c(i:k),1] == lgb.pred[i:k]
    cnt1 <- sum(a[a=TRUE])
    acc <- cnt1/165312
    result = c(result,acc)
    i = i+n
  }
  result = result[-sample_block[[3]]]
  num = order(result,decreasing=T)[1:5]
  list = list()
  list[[3]] = c(sample_block[[3]],num)
  list[[1]] = orginal_block_name[list[[3]]]
  list[[2]] = orginal_block_name[-list[[3]]]
  return(list)
}

# 初始数据集创建

orginal_block_name = c()
for(i in 0:6){
  for(j in 0:15){
    k = paste(i,j,sep='_')
    orginal_block_name = c(orginal_block_name,k)
  }
}

sampleblock <- function(x){
  
  #随机选区
  
  path = orginal_block_name # 获取当前目录文件名
  s<-srswor(x,112)  # 随机获取x个区块
  ind<-(1:112)[s!=0]
  sample<-path[ind]
  path <- path[-ind]  # 删除x个随机区块
  return(list(sample, path, ind))
} # 随机选区

sample_block = sampleblock(90) # 选取70个

# 测试数据
y_test = readtif_vector('D:\\Personality\\paper\\GBDT\\label\\dilei.tif')
x_test = readtestfeature('D:\\Personality\\paper\\GBDT\\train')

# 迭代

iteration = 1
itera_acc <- list()
while (iteration <= 6) {
  
  # 训练数据集
  
  y_train = readlabel('D:\\Personality\\paper\\GBDT\\testshiyan\\label', sample = sample_block)
  x_train = readfeature('D:\\Personality\\paper\\GBDT\\testshiyan\\train', sample = sample_block)
  
  # 随机搜索,使用mlr3包进行调优
  dtrain <- cbind(x_train, y_train) %>% data.frame()
  dtrain$y_train <- factor(dtrain$y_train)
  dtest <- cbind(x_test, y_test) %>% data.frame()
  dtest$y_test <- factor(dtest$y_test)
  
  dtrain<-as_task_classif(dtrain,target = "y_train",id="a")
  dtest<-as_task_classif(dtest,target = 'y_test',id="b")
  
  learner <- lrn("classif.lightgbm")  # 创建学习器
  search_space <- ps(learning_rate = p_dbl(lower = 0.001, upper = 0.5),
                     num_iterations = p_int(lower = 200, upper = 600),
                     num_leaves = p_int(lower = 10, upper = 60),
                     max_depth = p_int(lower = -1, upper = 10)
  )  # 设置搜索空间
  resampling <- rsmp("holdout") # 选择重抽样方法
  terminator <- trm("evals", n_evals = 10) # 设定何时停止训练
  measure <- msr("classif.acc") # 选择评价指标
  tuner <- tnr("random_search") # 选择搜索方法
  
  # 搜索
  at <- AutoTuner$new(
    learner = learner,
    resampling = resampling,
    search_space = search_space,
    measure = measure,
    tuner = tuner,
    terminator = terminator
  )
 
  # 训练模型
  at$train(dtrain)
  # 预测
  preds <- at$predict(dtest)
  print('-----------------------\n准确度为:')
  
  preds$score(measures = msr("classif.acc"))
  
  print('-----------------------------------')
  
  # 概率选区
  
  sample_block <- probability_block(preds$response)
  
  # tif保存
  
  save_tif(preds,iteration)
  
  iteration = iteration + 1
  
}


运行结果及报错内容
我的解答思路和尝试过的方法

我尝试使用lightgbm自身的lgb.train进行预测,也尝试使用mlr3的调参后的模型进行预测,但是得到的预测准确度都很低,甚至循环多次后,准确度还降低了。

我想要达到的结果

我想解决这一问题,并能进行正常的模型训练以及预测。

  • 写回答

1条回答 默认 最新

  • 有问必答小助手 2022-06-20 09:51
    关注

    你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答


    本次提问扣除的有问必答次数,已经为您补发到账户,我们后续会持续优化,扩大我们的服务范围,为您带来更好地服务。

    评论

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 6月21日
  • 创建了问题 6月15日

悬赏问题

  • ¥15 执行 virtuoso 命令后,界面没有,cadence 启动不起来
  • ¥50 comfyui下连接animatediff节点生成视频质量非常差的原因
  • ¥20 有关区间dp的问题求解
  • ¥15 多电路系统共用电源的串扰问题
  • ¥15 slam rangenet++配置
  • ¥15 有没有研究水声通信方面的帮我改俩matlab代码
  • ¥15 ubuntu子系统密码忘记
  • ¥15 保护模式-系统加载-段寄存器
  • ¥15 电脑桌面设定一个区域禁止鼠标操作
  • ¥15 求NPF226060磁芯的详细资料