
这里记录下非靶向代谢组学的全分析流程,以3分组为例,即正常组、疾病组、治疗组。对于简单的2分组或者更为复杂的分组,只需要对应修改下即可。
这里是第3部分,直接接着第1、2部分运行即可,详情见
我想使用“clusterprofiler”包进行代谢组学的kegg分析,但是“clusterprofiler”包是面向基因组学的,要是想使用其做代谢组学,就需要自己构建背景基因集,该代码只需要执行一次,之后做其他的代谢组学只需要直接用即可。
以小鼠物种为例,人类物种遇到了再处理吧,累了累了
这段代码可以用于构建本地 KEGG 代谢通路信息数据库(小鼠 物种),适合用于本地化富集分析或绘图使用。你所用的 massdatabase 包配合 metpath 或metID 系列分析流程是目前 代谢组富集分析推荐做法之一
library(massdatabase)
download_kegg_pathway(path = "kegg_rat_pathway",
sleep = 1,
organism = "mmu")
mmu_data <- read_kegg_pathway(path = "kegg_rat_pathway")
class(rat_data)
## 转变数据库形式
kegg_pathway_database <-convert_kegg2metpath(data=mmu_data, path = ".")
##查看有多少通路
length(unlist(kegg_pathway_database@pathway_name))
#提取通路与gene对应信息
library(tidyr)
path_num <- c(1:11) ## 排除没有对应gene的几个通路
## 提取1到67 77 到104的通路
result1 <- NULL
for (i in 1:355) {
a <- mmu_data[[i]]$pathway_id # 提取id
b <- mmu_data[[i]]$pathway_name # 提取名称
e <- mmu_data[[i]]$gene_list # 提取gene对应关系
c <- mmu_data[[i]]$describtion
if ( i %in% path_num){
d <- 'None;None'
}else {
d <- mmu_data[[i]]$pathway_class # 提取前两层级注释
Pathway_re <- e%>% mutate(pathway_id = a,
pathway_name = b,
pathway_class= d) %>%
separate(pathway_class,into =c("Pathway1","Pathway2"),sep = ';',remove = FALSE) ##前两层分开
result1 <- rbind(result1,Pathway_re)}
}
write.table(result1, 'mmu_KEGG_path.tsv', sep = '\t',row.names = FALSE,quote = FALSE)
unique(result2$pathway_id)
#提取通路与化合物对应信息
result2 <- NULL
com_num <- c(1,2,3,4,5,6,7,8,9,10,11,57,59,63,86,87,89,91,92,93,94,95,96,97,102,103,104,105) ## 排除没有对应化合物的几个通路
for (i in 1:105) {
a <- mmu_data[[i]]$pathway_id # 提取id
b <- mmu_data[[i]]$pathway_name # 提取名称
f <- mmu_data[[i]]$compound_list # 提取化合物对应关系
# c <- tgo_data[[i]]$describtion
if ( i %in% com_num){
d <- 'None;None'
}else {
d <- mmu_data[[i]]$pathway_class
Compound_re <- f %>% mutate(pathway_id = a,
pathway_name = b,
pathway_class= d)%>%
separate(pathway_class,into =c("Pathway1","Pathway2"),sep = ';',remove = FALSE)
result2 <- rbind(result2,Compound_re)
}
}
write.table(result2, 'mmu_KEGG_com.tsv', sep = '\t',row.names = FALSE,quote = FALSE)mmu_KEGG_com.tsv

mmu_KEGG_path.tsv

本地进行代谢物KEGG富集分析
K_C = inf$`KEGG Compound ID`[inf$Metabolite%in%rownames(deg1[deg1$Change!='Not',])]
K_C = K_C[K_C != '-']
K_C = unlist(lapply(K_C,function(x) unlist(strsplit(x,split=';'))))
OGD_RvsNormal_K_C <- K_C
#kegg1 <- KEGG_com(K_C,savedir="data/2_transcriptome/",name="OGD_R vs. Normal",p=1,shownum=15,w=7.5,h=8)
K_C = inf$`KEGG Compound ID`[inf$Metabolite%in%rownames(deg2[deg2$Change!='Not',])]
K_C = K_C[K_C != '-']
K_C = unlist(lapply(K_C,function(x) unlist(strsplit(x,split=';'))))
HSYA75vsOGD_R_K_C <- K_C
#富集分析
library(clusterProfiler)
kegg_anno <- data.table::fread("~/Documents/database/Metabolomics/mmu/mmu_KEGG_com.tsv",
stringsAsFactors = FALSE,
data.table = F)
colnames(kegg_anno)[colnames(kegg_anno) == "KEGG.ID"] <- "compound_id"
# 提取 compound 与 pathway 的对应关系
kegg_term2gene <- kegg_anno %>%
select(pathway_id, compound_id) %>%
distinct()
# 提取 term 的注释(用于 enrichResult 中的描述字段)
kegg_term2name <- kegg_anno %>%
select(pathway_id, pathway_name) %>%
distinct()
kegg1 <- enricher(gene = OGD_RvsNormal_K_C, # 你感兴趣的代谢物(KEGG Compound ID)
TERM2GENE = kegg_term2gene, # 通路与代谢物的映射表
TERM2NAME = kegg_term2name, # 通路的名称表
pvalueCutoff =1, # 可调整为 0.1 等更宽松阈值
pAdjustMethod = "BH")
kegg2 <- enricher(gene = HSYA75vsOGD_R_K_C, # 你感兴趣的代谢物(KEGG Compound ID)
TERM2GENE = kegg_term2gene, # 通路与代谢物的映射表
TERM2NAME = kegg_term2name, # 通路的名称表
pvalueCutoff =1, # 可调整为 0.1 等更宽松阈值
pAdjustMethod = "BH")OGD_RvsNormal_K_C

kegg1$result(示例,非本项目数据)

#绘图
KEGG_intersect = intersect(kegg1@result$ID,kegg2@result$ID)
#二者共有通路
list_venn <- list(
"KEGG Pathways in\nOGD_R vs. Normal" = kegg1@result$ID,
"KEGG Pathways in\nHSYA75 vs. OGD_R" = kegg2@result$ID
)
ggvenn(list_venn,c("KEGG Pathways in\nOGD_R vs. Normal","KEGG Pathways in\nHSYA75 vs. OGD_R"),fill_color=c('#ff3c00',"#1B9E77"),
set_name_size=6,text_size=6,show_percentage = F, stroke_size = 1,stroke_color = "grey100")+
coord_cartesian(expand=T,clip="off")+
theme(plot.margin = unit(c(4, 0, 0, 0), "lines"))
ggsave(file = "data/3_metabolomics/Venn of KEGG Pathways Between OGD_R vs. Normal & HSYA75 vs. OGD_R.png",width = 5.8,height=4.7,dpi=300)
df_kegg = rbind(kegg1@result[kegg1@result$ID%in%KEGG_intersect,],kegg2@result[kegg2@result$ID%in%KEGG_intersect,])
df_kegg$Group = rep(c("OGD_R vs. Normal", "HSYA75 vs. OGD_R"), each = 20)
ggplot(df_kegg, aes(x = Description, y = Count, fill = Group)) +
geom_bar(stat = "identity", position = "dodge",alpha=0.8,width=0.8) +
labs(x = "KEGG Pathways", y = "Number of Metabolites") +
scale_fill_manual(values = c('#1B9E77',"#ff3c00"))+
labs(fill = "Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 55, hjust = 1),plot.margin = unit(c(0.5, 0.5, 0.5, 2), "cm"))
ggsave(filename = "data/3_metabolomics/KEGG_Num_of_Metabolite.png",width=8,height=5)
T5 = createWorkbook()
addWorksheet(T5,'Intersect Pathway')
writeData(T5, "Intersect Pathway", df_kegg)
addWorksheet(T5,'KEGG for OGD_R vs. Normal')
writeData(T5, "KEGG for OGD_R vs. Normal", kegg1@result)
addWorksheet(T5, "KEGG for HSYA75 vs. OGD_R")
writeData(T5, "KEGG for HSYA75 vs. OGD_R", kegg2@result)
saveWorkbook(T5, "Figures+Tables/Table S5. Metabolite Enrichment Analysis Results.xlsx", overwrite = TRUE)
#附加批量PDF转PNG
library(pdftools)
library(magick)
PDF_TO_PNG <- function(pdf_file,png_file){
pdf_images <- pdf_convert(pdf_file, format = "png", dpi = 300)
for (i in seq_along(pdf_images)) {
image <- image_read(pdf_images[i])
image_write(image, path = png_file, format = "png")
}
}
pdf_files <- list.files("data/3_metabolomics/", pattern = "\\.pdf$", full.names = TRUE)
for (pdf_path in pdf_files) {
png_path <- file.path(paste0(strsplit(pdf_path,split = '\\.')[[1]][1], ".png"))
# 调用你的函数
PDF_TO_PNG(pdf_path, png_path)
}
补充单独kegg气泡图代码
kk_dt <- kegg1@result
#kk_dt <- kegg2@result
kk_dt_top10 <- kk_dt[order(kk_dt$p.adjust), ][1:min(10, nrow(kk_dt)), ]
#kk_dt_top10 <- kk_dt
kk_dt_top10$GeneRatio
kk_dt_top10$Description
kk_dt$Description
#绘图
dt <- kk_dt_top10
library(ggplot2)
library(stringr)
mytheme = theme(axis.text.x = element_text(hjust = 0.5,size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
#axis.line = element_line(size = 1),#坐标轴粗细
plot.margin = unit(c(1,1,1,1), "cm"),#画布边缘距离上(top)、右(right)、下(bottom)、左(left)
plot.title = element_text(hjust = 0.5,size = 22),
legend.title = element_text(size = 22),
legend.text = element_text(size = 22),
legend.position = "right",
legend.background = element_rect(fill = 'transparent'))
#GeneRatio改为小数
dt$GeneRatio <- as.numeric(str_extract(dt$GeneRatio, "^[0-9]+")) /
as.numeric(str_extract(dt$GeneRatio, "(?<=/)[0-9]+"))
dt$richFactor <- dt$Count / as.numeric(sub("/\\d+", "", dt$BgRatio))
dt2 <- dt[order(dt$GeneRatio),]
dt2$Description <- factor(dt2$Description,levels = dt2$Description)
p_kegg <- ggplot(data = dt2, aes(x = Description, y = GeneRatio, fill = -log10(pvalue))) +
geom_point(aes(size = Count), shape = 21, colour = "black") + # 绘制散点
labs(
x = NULL,
y = "GeneRatio",
title = "KEGG for OGD_R vs. Normal",
#title = "KEGG for TMP vs. OGD_R",
fill = bquote("-"~Log[10]~"(Pvalue)"),
size = "Metabolite Count"
) +
scale_fill_gradientn(colours = c("#f7ca64", "#46bac2","#7e62a3")) + # 更改配色
scale_size_continuous(range = c(4, 8)) +#散点大小范围
coord_flip() +
theme_bw() +
mytheme +
theme(
plot.title = element_text(hjust = 0.5, size = 18, color = "black", face = "bold"), # 标题颜色和样式
axis.title.y = element_text(size = 20, color = "darkred"), # y轴标题颜色
axis.title.x = element_text(size = 10, color = "darkgreen"), # x轴标题颜色
axis.text.x = element_text(size = 12, color = "black"), # x轴标签颜色
axis.text.y = element_text(size = 12, color = "black"), # y轴标签颜色
legend.title = element_text(size = 10, color = "purple"), # 图例标题颜色
legend.text = element_text(size = 10, color = "brown") # 图例标签颜色
) +
scale_x_discrete(labels = function(dat) str_wrap(dat, width = 50)) # 通路名自动换行
p_kegg
ggsave('table+figure/KEGG for OGD_R vs. Normal.pdf',plot = p_kegg,height = 6,width = 7) 
之前的火山图比较丑,这里补充下美化后的版本,数据来源就是之前的deg1或deg2
#绘制
library(ggplot2)
library(ggrepel)
library(dplyr)
# logFC 和 P.Value 的阈值
logFC_t <- 0
p_t <- 0.1
# deg <- deg1
deg <- deg2
head(deg)
# Step 1: 提取 top5 上调 & 下调代谢物
top_up <- deg %>%
filter(Change == "Up") %>%
arrange(desc(logFC)) %>%
head(5)
top_down <- deg %>%
filter(Change == "Down") %>%
arrange(logFC) %>%
head(5)
top_met <- rbind(top_up, top_down)
# Step 2: 绘制火山图
this_title <- "TMP vs. OGD_R"
p1 <- ggplot(data = deg, aes(x = logFC, y = -log10(P.Value))) +
geom_point(aes(color = Change, size = VIP), alpha = 0.7) +
scale_color_manual(values = c("Down" = "blue", "Not" = "grey", "Up" = "red")) +
scale_size_continuous(range = c(1, 4)) +
geom_vline(xintercept = c(-logFC_t, logFC_t), linetype = "dashed", color = "black", linewidth = 0.4) +
geom_hline(yintercept = -log10(p_t), linetype = "dashed", color = "black", linewidth = 0.4) +
theme_classic() +
labs(x = "log2(FC)", y = "-log10(P value)", size = "VIP", color = "Change") +
geom_text_repel(data = top_met,
aes(label = rownames(top_met)),
size = 3,
box.padding = 0.3,
point.padding = 0.2,
max.overlaps = 100) +
ggtitle(this_title)+
theme(plot.title = element_text(size=12,hjust = 0.5))
# 展示图
print(p1)
#ggsave(plot = p1, filename = "table+figure/deg_vol_OvsN_pos.pdf", height = 5, width = 6)
ggsave(plot = p1, filename = "table+figure/deg_vol_TvsO_pos.pdf", height = 5, width = 6)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。