[学习]KEGG富集分析绘图问题
初学R语言,遇到了一些问题,请指点迷津,研究了好久,仍无头绪.
代码如下,为什么会出现无法绘制图片的情况?谢谢指点
## Gene functional enrichment analysis
## R版本为4.32,RStudio为3.18,clusterprofiler已更新至最新4.11版本
##数据集采用GSE41804,可见图片,已处理完成见图片部分
数据分别依次对应:
ID Gene logFC AveExpr t P.Value adj.P.Val B Compare

# packages
library(enrichplot)
library(org.Hs.eg.db)
library(clusterProfiler)
library(openxlsx)
library(KEGG.db) #由于kegg在线获取不稳定,已下载kegg.db,采用本地化分析节省时间
#setwd("/Users/user/Desktop/R")
# Load database
database <- org.Hs.egSYMBOL2EG
database <- as.list(database)
# Conversion of gene IDs
# Show entrez gene IDs
diff = read.xlsx("Table1.DEG.xlsx",sheet = 1) #差异基因没有出现NA情况
nrow(diff)
gene.list = diff$Gene
gene.list.enrich <- database[names(database) %in% gene.list]
# GO and KEGG enrichment
edo.kegg <- enrichKEGG(as.character(unlist(gene.list.enrich)), organism = "hsa",
pAdjustMethod = 'BH', keyType="KEGG",
pvalueCutoff = 1, qvalueCutoff = 1,use_internal_data = TRUE)
# gene enriched gene name
id2gene <- function(id.list){
tmp = bitr(strsplit(id.list,"/")[[1]],fromType = "ENTREZID",toType = "SYMBOL",OrgDb = "org.Hs.eg.db")
return(paste(tmp$SYMBOL,collapse = "/"))
}
## KEGG out
edo.kegg.out = as.data.frame(edo.kegg)[1:100,]
head(edo.kegg.out)
tmp = unlist(as.data.frame(sapply(edo.kegg.out$geneID, id2gene))[1])
colnames(tmp) = NULL
edo.kegg.out$geneName = tmp
write.xlsx(edo.kegg.out,"Table2.KEGG.xlsx")
#该数据输出查看后,发现category和subcategory存在空值,尝试用na.omit函数删除edo.kegg中产生的空值
#到tmp部分就会出现报错:Error:replacement has 99 rows, data has 100,如果不删除,
#在下面画图阶段就会出现这个错误:
#Error in `$<-.data.frame`(`*tmp*`, "Ratio", value = numeric(0)) :
#replacement has 0 rows, data has 10 不是很清楚如何处理该问题
#In addition: Warning message:画图警告这里可以忽略
#In DrawGOBubblePlot(data, "KEGG", 10, "blue") : NAs introduced by coercion
#Draw GO_KEGG BUBBLE PIC
library(Hmisc)
library(ggplot2)
library(stringr)
library(cowplot)
DrawGOBubblePlot <- function(dat, category = "KEGG", top.number = 15, col="blue"){
# Draw bubble plot using DAVID function enrichment results
category = toupper(category)
if (category == "KEGG"){
main.title = "cleanKEGG"
} else {
return(p)
}
dat1 = dat[c(1:top.number),c(2,3,4,5,9)]
dat1[,2] = str_remove(dat1[,2],"/.*")
dat1[,3] = str_remove(dat1[,3],"/.*")
dat1$Ratio = as.numeric(dat1$GeneRatio) / as.numeric(dat1$BgRatio)
dat1$Description = capitalize(dat1$Description)
dat1$Description = factor(dat1$Description,levels=dat1$Description[length(dat1$Description):1])
dat1$pvalue = -log10(dat1$pvalue)
p = ggplot(dat1,aes(Ratio,Description)) +
geom_point(aes(size=Count,colour=pvalue)) +
scale_colour_gradient(low=col,high="red") +
labs(colour=expression(-log[10]("P Value")),size="Gene counts",
x="Gene Ratio",y="",title=main.title) +
theme_bw() +
scale_x_continuous(limits = c(0,max(dat1$Ratio) * 1.2)) +
scale_y_discrete(labels = function(x) str_wrap(x, width = 50))
}
#KEGG
data = read.xlsx("Table2.KEGG.xlsx")
head(data)
p = DrawGOBubblePlot(data,"KEGG",10,"blue")
p
