这里记录下非靶向代谢组学的全分析流程,以3分组为例,即正常组、疾病组、治疗组。对于简单的2分组或者更为复杂的分组,只需要对应修改下即可
rm(list = ls()) # 清空当前环境中的所有变量
options(stringsAsFactors = F) # 关闭字符串自动转换为因子(防止类型混乱)
# 加载必要的R包
library(openxlsx) # 用于读取和写入 Excel 文件
library(ggplot2) # ggplot2绘图系统
dir.create("data/3_metabolomics") # 创建结果保存目录
# 读取正负离子模式的代谢数据表格(csv格式)
POS <- read.csv("data/3_metabolomics/raw/mix_MetabTable_Origin.csv")
NEG <- read.csv("data/3_metabolomics/raw/mix_MetabTable_Origin (1).csv")
# 合并正负离子数据
inf <- rbind(POS, NEG)
# 将正负模式分别写入一个Excel文件(多个工作表,可选)
T3 = createWorkbook()
addWorksheet(T3,'正离子模式')
writeData(T3, "正离子模式", POS) # 正模式代谢物数据
addWorksheet(T3,'负离子模式')
writeData(T3, "负离子模式", NEG) # 负模式代谢物数据
saveWorkbook(T3, "Figures+Tables/Table S3. Metabolomics_mix_MetabTable_Origin.xlsx", overwrite = TRUE)
注:每家测序公司的出的报告大致相同,只要拿到各pos组、neg组代谢物的表达丰度表即可。注意代谢物小分子需要筛选出有明确name的。
inf列名
inf数据框(部分)
#POS <- POS[!is.na(POS$Name),]
#NEG <- NEG[!is.na(NEG$Name),]
# 提取代谢物表达矩阵(正负模式的第2列和第16~33列为表达量)
dataMatrix_pos <- as.data.frame(POS[,c(2,16:33)])
dataMatrix_neg <- as.data.frame(NEG[,c(2,16:33)])
# 设置代谢物名称为行名,并移除第一列(已设置为行名)
rownames(dataMatrix_pos) <- dataMatrix_pos$Metabolite
dataMatrix_pos <- dataMatrix_pos[,-1]
rownames(dataMatrix_neg) <- dataMatrix_neg$Metabolite
dataMatrix_neg <- dataMatrix_neg[,-1]
# 合并正负模式代谢矩阵
dataMatrix = rbind(dataMatrix_pos, dataMatrix_neg)
# 设置实验分组,每组6个样本
group_list <- c(rep('Normal',6), rep('OGD_R',6), rep('HSYA75',6))
# 自定义三组颜色:蓝色、橙色、红色
biocolor = c('#5a8dda', "#ECB884", '#e81a1d')
dataMatrix:即整理成行名为代谢物,列为各样本表达
PCA 主成分分析函数
library(ropls) # 提供PCA, PLS-DA, OPLS-DA功能
PCA = function(dataMatrix, group_list, color, savepath='', filename='') {
pca <- opls(t(dataMatrix), predI = 2) # 使用ropls包做PCA,取两个主成分
scores <- pca@scoreMN # 提取样本坐标(得分)
score_df <- data.frame(
Sample = rownames(scores),
PC1 = scores[, 1],
PC2 = scores[, 2],
Group = factor(group_list, levels = unique(group_list))
)
# 绘制散点图,分组椭圆 + 主成分贡献度
ggplot(score_df, aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
stat_ellipse(aes(group = Group)) +
theme_classic(base_size = 14) +
scale_color_manual(values = color) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
labs(
title = "Score (PCA)",
x = paste0("PC 1 (", round(pca@modelDF["R2X"][1,] * 100, 1), "%)"),
y = paste0("PC 2 (", round(pca@modelDF["R2X"][2,] * 100, 1), "%)")
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
legend.title = element_blank(),
panel.grid = element_blank(),
legend.position = c(0.98, 0.98),
legend.justification = c("right", "top"))
ggsave(paste0(savepath, filename), height = 5.2, width = 5)
}
#函数调用
# 全部组比较
PCA(dataMatrix, group_list, biocolor, 'data/3_metabolomics/', 'Figure 1. PCA_All_Group.png')
# Normal vs OGD_R
PCA(dataMatrix[,1:12], group_list[1:12], biocolor[1:2], 'data/3_metabolomics/', 'Figure 1. PCA_Normal vs. OGD_R.png')
# HSYA75 vs OGD_R
PCA(dataMatrix[,7:18], group_list[7:18], biocolor[2:3], 'data/3_metabolomics/', 'Figure 1. PCA_HSYA75 vs. OGD_R.png')
PLS-DA 监督模型函数
PLS_DA = function(dataMatrix, group_list, color, savepath='', filename='') {
plsda <- opls(t(dataMatrix), group_list) # 拟合 PLS-DA 模型
scores <- plsda@scoreMN
score_df <- data.frame(
Sample = rownames(scores),
Component_1 = scores[, 1],
Component_2 = scores[, 2],
Group = factor(group_list, levels = unique(group_list))
)
# 可视化
ggplot(score_df, aes(x = Component_1, y = Component_2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
stat_ellipse(aes(group = Group)) +
theme_classic(base_size = 14) +
scale_color_manual(values = color) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
labs(
title = "Score (PLS-DA)",
x = paste0("Component 1 (", round(plsda@modelDF["R2X"][1,] * 100, 1), "%)"),
y = paste0("Component 2 (", round(plsda@modelDF["R2X"][2,] * 100, 1), "%)")
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
legend.title = element_blank(),
panel.grid = element_blank(),
legend.position = c(0.98, 0.98),
legend.justification = c("right", "top"))
ggsave(paste0(savepath, filename), height = 5.2, width = 5)
}
#调用函数
PLS_DA(dataMatrix, group_list, biocolor, 'data/3_metabolomics/', 'Figure 1. PLS-DA_All_Group.png')
PLS_DA(dataMatrix[,1:12], group_list[1:12], biocolor[1:2], 'data/3_metabolomics/', 'Figure 1. PLS-DA_Normal vs. OGD_R.png')
PLS_DA(dataMatrix[,7:18], group_list[7:18], biocolor[2:3], 'data/3_metabolomics/', 'Figure 1. PLS-DA_HSYA75 vs. OGD_R.png')
OPLS-DA 函数(含 VIP 提取)
OPLS_DA = function(dataMatrix, group_list, color, savepath='', filename='') {
oplsda <- opls(t(dataMatrix[,1:12]), group_list[1:12], predI = 1, orthoI = NA)
VIP <- oplsda@vipVn # 提取变量重要性
score_df <- data.frame(
Sample = rownames(oplsda@scoreMN),
Component_1 = oplsda@scoreMN[, 1],
Component_2 = oplsda@orthoScoreMN[, 1],
Group = factor(group_list, levels = unique(group_list))
)
# 可视化
ggplot(score_df, aes(x = Component_1, y = Component_2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
stat_ellipse(aes(group = Group)) +
theme_classic(base_size = 14) +
scale_color_manual(values = color) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
labs(
title = "Score (OPLS-DA)",
x = paste0("Component 1 (", round(oplsda@modelDF["R2X"][1,] * 100, 1), "%)"),
y = paste0("Orthogonal Component 1 (", round(oplsda@modelDF["R2Y"][2,] * 100, 1), "%)")
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
legend.title = element_blank(),
panel.grid = element_blank(),
legend.position = c(0.98, 0.98),
legend.justification = c("right", "top"))
ggsave(paste0(savepath, filename), height = 5.2, width = 5)
return(VIP)
}
#调用函数
VIP1 = OPLS_DA(dataMatrix[,1:12], group_list[1:12], biocolor[1:2], 'data/3_metabolomics/', 'Figure 1. OPLS-DA_Normal vs. OGD_R.png')
VIP2 = OPLS_DA(dataMatrix[,7:18], group_list[7:18], biocolor[2:3], 'data/3_metabolomics/', 'Figure 1. OPLS-DA_HSYA75 vs. OGD_R.png')
即每种分析各出3张图,即总体分组、AB组、BC组
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。