这里记录下非靶向代谢组学的全分析流程,以3分组为例,即正常组、疾病组、治疗组。对于简单的2分组或者更为复杂的分组,只需要对应修改下即可。
这里是第2部分,直接接着第1部分运行即可,详情见非靶向代谢组学—全分析流程1(以3分组为例)
使用limma包对疾病模型与正常组,治疗组与模型组的代谢物进行差异分析
DEG_limma(...) 差异分析函数,对代谢物表达矩阵进行 差异表达分析 + 火山图绘制 + VIP筛选
# 加载所需包
library(ggplot2)
library(rlang)
library(limma)
# 定义差异分析函数(支持 LIMMA + VIP + Volcano Plot)
DEG_limma = function(expr_matrix, group_list, group_str, output_path = '',
cutoff = 0.585, p = 0.05, t = "", h = 10, l = 0, lr = 5,
markers = "", VIP) {
# 构建设计矩阵
design <- model.matrix(~0 + factor(group_list))
colnames(design) = levels(factor(group_list))
rownames(design) = colnames(expr_matrix)
# 构建对比矩阵
contrast.matrix <- makeContrasts(contrasts = group_str, levels = design)
# 拟合线性模型
fit <- lmFit(expr_matrix, design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2) # 贝叶斯调整
# 提取差异表达结果
tempOutput = topTable(fit2, coef = 1, n = Inf)
nrDEG = na.omit(tempOutput)
# 合并 VIP 值(代谢组特有)
nrDEG <- merge(nrDEG, VIP, by = "row.names", all = TRUE)
rownames(nrDEG) = nrDEG[,1]
nrDEG = nrDEG[,-1]
colnames(nrDEG)[ncol(nrDEG)] = 'VIP'
View(nrDEG) # 可视化表格(RStudio)
# 判断上下调(根据 adj.P.Val、VIP 和 logFC)
nrDEG$Change = as.factor(ifelse((nrDEG$adj.P.Val < p) & (nrDEG$VIP > 1) & (abs(nrDEG$logFC) > cutoff),
ifelse(nrDEG$logFC > cutoff, 'Up', 'Down'), 'Not'))
deg = nrDEG
# 保存 DEG 结果为 Excel 表
write.xlsx(nrDEG, paste0(output_path, t, '_deg.xlsx'), rowNames = T)
# 标记特定 marker 代谢物
nrDEG$marker = ifelse(rownames(nrDEG) %in% markers, 1, 0)
nrDEG$gene = rownames(nrDEG)
# 绘制火山图
g2 = ggplot(data = nrDEG,
aes(x = logFC, y = -log10(adj.P.Val), color = Change, size = VIP)) +
geom_point(alpha = 0.5) +
theme_set(theme_bw(base_size = 20)) +
xlab(expression(Log[2]*FC)) + ylab(expression(-Log[10]~P-value)) +
scale_colour_manual(values = c("#5a8dda", 'grey', "#e81a1d")) +
coord_cartesian(xlim = c(-lr, lr), ylim = c(l, h)) +
geom_vline(xintercept = c(-cutoff, cutoff), linetype = "dotted", color = "black") +
geom_hline(yintercept = -log10(p), linetype = "dotted", color = "black") +
guides(color = guide_legend(override.aes = list(size = 5))) +
theme(panel.grid = element_blank())
# 标记 marker 代谢物(蓝色点 + 文字注释)
g2 <- g2 + geom_point(data = nrDEG[nrDEG$marker == 1,],
aes(logFC, -log10(adj.P.Val)), color = "blue", size = 1.5, alpha = 0.8)
print(nrDEG[nrDEG$marker == 1,]) # 输出被标注的代谢物信息
g2 <- g2 + ggrepel::geom_text_repel(data = nrDEG[nrDEG$marker == 1,],
aes(logFC, -log10(adj.P.Val), label = gene),
force = 20, color = "grey15",
max.overlaps = 50,
size = 6, point.padding = 0.5,
arrow = arrow(length = unit(0.01, "npc"),
type = "open", ends = "last"),
segment.color = "grey15",
segment.size = 0.2,
segment.alpha = 0.8)
# 保存火山图
ggsave(g2, filename = paste0(output_path, 'Volcano_Plot_deg_', t, '.png'), width = 7.4, height = 6.2)
return(deg)
}
# ========== 差异分析调用 ==========
deg1 <- DEG_limma(dataMatrix[, 1:12], group_list[1:12], "OGD_R-Normal",
output_path = 'data/3_metabolomics/',
cutoff = 0, p = 0.05, t = "OGD_R vs. Normal",
h = 7, l = 0, lr = 1, markers = "", VIP = VIP1)
deg2 <- DEG_limma(dataMatrix[, 7:18], group_list[7:18], "HSYA75-OGD_R",
output_path = 'data/3_metabolomics/',
cutoff = 0, p = 0.05, t = "HSYA75 vs. OGD_R",
h = 12, l = 0, lr = 3, markers = "", VIP = VIP2)
对于调用的函数各项参数解释
DEG矩阵:
绘制的火山图:
因为是三分组,我们重点关注的是疾病组 vs 正常组,和治疗组 vs 疾病组中,表达趋势相反的代谢物,这里进行了相关的可视化,如韦恩图、表达热图、相关性图、代谢物表达箱线图
# ========== Venn图分析 ==========
library(ggvenn)
# OGD_R下调 + HSYA75上调 的交集
list_venn <- list(
"OGD_R vs. Normal 下调" = rownames(deg1[deg1$Change == 'Down',]),
"HSYA75 vs. OGD_R 上调" = rownames(deg2[deg2$Change == 'Up',])
)
ggvenn(list_venn, fill_color = c('#5a8dda', "#e81a1d"),
set_name_size = 6, text_size = 6, show_percentage = F, stroke_size = 1, stroke_color = "grey100") +
coord_cartesian(expand = T, clip = "off") +
theme(plot.margin = unit(c(4, 0, 0, 0), "lines"))
ggsave("data/3_metabolomics/Venn of Differential Metabolites Between Down-regulated in OGD_R vs. Normal & Up-regulated in HSYA75 vs. OGD_R.png",
width = 5.8, height = 4.7, dpi = 300)
Up_compounds = intersect(rownames(deg1[deg1$Change == 'Down',]), rownames(deg2[deg2$Change == 'Up',]))
# OGD_R上调 + HSYA75下调 的交集
list_venn <- list(
"OGD_R vs. Normal 上调" = rownames(deg1[deg1$Change == 'Up',]),
"HSYA75 vs. OGD_R 下调" = rownames(deg2[deg2$Change == 'Down',])
)
ggvenn(list_venn, fill_color = c('#e81a1d', "#5a8dda"),
set_name_size = 6, text_size = 6, show_percentage = F, stroke_size = 1, stroke_color = "grey100") +
coord_cartesian(expand = T, clip = "off") +
theme(plot.margin = unit(c(4, 0, 0, 0), "lines"))
ggsave("data/3_metabolomics/Venn of Differential Metabolites Between Up-regulated in OGD_R vs. Normal & Down-regulated HSYA75 vs. OGD_R.png",
width = 5.8, height = 4.7, dpi = 300)
Down_compounds = intersect(rownames(deg1[deg1$Change == 'Up',]), rownames(deg2[deg2$Change == 'Down',]))
趋势相反的代谢物:
venn图:
# ========== 热图绘制 ==========
library(pheatmap)
# 提取需要绘图的代谢物数据
choose_matrix = dataMatrix[c(Down_compounds, Up_compounds),]
# 构建注释颜色
annotation_col = list(group = c(Normal = "#5a8dda", OGD_R = "#ECB884", HSYA75 = "#e81a1d"))
annotation = data.frame(group = group_list)
rownames(annotation) = colnames(dataMatrix)
# 数据标准化
choose_matrix = t(scale(t(choose_matrix)))
choose_matrix[choose_matrix > 2] = 2
choose_matrix[choose_matrix < -2] = -2
# 绘制热图
pheatmap(choose_matrix,
filename = "data/3_metabolomics/Pheatmap_of_Differential Metabolites.png",
border_color = "grey100",
color = colorRampPalette(c('#1B9E77', 'grey80', "#ff3c00"))(100),
annotation_colors = annotation_col[1],
annotation = annotation,
show_colnames = TRUE,
cluster_cols = TRUE,
fontsize = 9,
height = 8,
width = 15,
angle_col = 45)
热图:
# ========== 相关性分析 ==========
library(Hmisc)
library(corrplot)
# 行名截断处理,防止标签过长
rownames(choose_matrix) = lapply(rownames(choose_matrix), function(x) gsub(" ", "_", x))
rownames(choose_matrix) = sapply(rownames(choose_matrix), function(x)
if (nchar(x) > 20) paste0(substr(x, 1, 20), "...") else x)
# 计算代谢物间相关性
res <- cor(as.matrix(t(choose_matrix)))
# 绘制相关性热图
png("data/3_metabolomics/Correlation analysis of differential metabolites.png", width = 8, height = 8, units = 'in', res = 300)
corrplot(res,
method = "square",
type = "upper",
order = "hclust",
col = rev(COL2('RdBu')),
tl.cex = 0.8,
tl.col = "black")
dev.off()
# ========== 代谢物表达箱线图 ==========
# 根据 KEGG.Compound.ID 过滤掉值为 "-" 的代谢物,只保留有 KEGG 注释的
choose_matrix = dataMatrix[inf$Metabolite[inf$KEGG.Compound.ID != '-'], ]
# 进一步筛选,只保留Down_compounds的代谢物
choose_matrix = choose_matrix[rownames(choose_matrix)%in% Down_compounds,]
# 加载 tidyverse 包用于数据整理
library(tidyverse)
# 转换表达矩阵格式:先转置表达矩阵(代谢物为列、样本为行),
# 添加 Group 分组信息,再转换为长数据格式便于绘图
expr_long <- as.data.frame(t(choose_matrix)) %>%
mutate(Group = group_list) %>% # 添加分组信息
pivot_longer(
cols = -Group, # 除 Group 外的所有列转为长格式
names_to = "Gene", # 原列名为 "Gene"
values_to = "Abundance" # 表达值为 "Abundance"
)
# 加载 ggpubr 包,用于绘制箱线图并添加统计分析
library(ggpubr)
# 创建一个空的列表用于存储每个代谢物的绘图结果
plots = list()
# 定义一个函数,用于绘制单个代谢物的箱线图
plot_gene <- function(gene_name) {
df <- expr_long %>% filter(Gene == gene_name) # 取出当前代谢物的表达数据
# 设置两组之间进行 t 检验的对比组合
comparisons <- list(c("Normal", "OGD_R"), c("OGD_R", "HSYA75"))
# 对基因名做美化处理(例如将符号换行显示),避免标题过长
gene_name = str_wrap(str_replace_all(df$Gene, "([-_:])", "\\1\n"), width = 20)
# 绘制箱线图 + 抖动图点 + 显著性标记
p = ggboxplot(df, x = "Group", y = "Abundance",
color = "Group", palette = biocolor, # 使用指定颜色
add = "jitter", width = 0.6) + # 添加抖动点
stat_compare_means(comparisons = comparisons, method = "t.test", label = "p.signif") + # 显著性标记
labs(title = gene_name, y = "Relative intensity", x = '') + # 设置坐标轴标签
theme_classic(base_size = 12) + # 使用简洁主题
coord_cartesian(expand = T, clip = "off") + # 防止标签裁剪
theme(plot.title = element_text(hjust = 0.5, size = 10), # 标题居中
legend.position = "none", # 不显示图例
axis.text.x = element_text(angle = 40, hjust = 1)) # X 轴标签倾斜显示
}
# 对每一个代谢物,调用绘图函数并存入列表
for (gg in unique(expr_long$Gene)) {
plots[[gg]] <- plot_gene(gg)
}
# 加载 gridExtra 包,用于将多个图合并为一个整图
library(gridExtra)
# 将所有代谢物的箱线图合并为网格形式,ncol=4 表示每行 4 个图
plot_grid <- do.call(grid.arrange, c(plots, ncol = 4))
# 保存图像为 PNG 文件,尺寸为 8x8 英寸
ggsave('data/3_metabolomics/Differential Metabolites.png',
plot = plot_grid,
width = 8,
height = 8) # 设置较大高度以适配所有图
图中的PC(18: 0/0: 0),PC(16: 0/0: 0)均为代谢物名称
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。