可视化—KEGG气泡图和桑葚图绘制

原创

sheldor没耳朵

发布于 2025-06-30 14:37:56

7920

文章被收录于专栏：R语言可视化R语言可视化

可视化—KEGG气泡图和桑葚图绘制

最近太忙了，其实有很多很多笔记需要整理，无奈实在没有时间进行。今天看到其他小伙伴的代码里绘制的KEGG气泡图和桑葚图觉得特别好看，就仔细扒一扒，写一篇笔记。

桑葚图的绘制主要用到了“ggsankey”这个R包

参考资料：https://www.jianshu.com/p/9af18ffa37bf

首先进行常规的kegg分析就行，然后拿到kk@result再进行绘图就可以了，其中包括：

数据处理
点图（气泡图）绘制
桑基图（Sankey Diagram）绘制
拼图合并
导出为 PDF 和 PNG 格式

# 加载必要的R包
library(tidyverse)    # 数据处理
# devtools::install_github("davidsjoberg/ggsankey")  # 安装ggsankey（如果未安装）
library(ggsankey)     # 绘制桑基图
library(ggplot2)      # 绘图
library(cols4all)     # 颜色管理包
library(cowplot)      # 图形拼接

# 设置显示的通路数量
show_num = 20

# 读取KEGG富集分析结果文件
KK <- read.table("data/2_GEO/DEGs of Stroke-Control_KEGG.tsv", sep = "\t", header = TRUE)

# 替换NA为空字符串
KK[is.na(KK)] <- ""

# 去除类别为“Human Diseases”的通路
KK <- KK[KK$category != 'Human Diseases', ]

# 保留p.adjust < 0.05的显著通路
KK <- KK[KK$p.adjust < 0.05, ]

# 构建通路-基因的映射数据框，用于桑基图绘制
df <- data.frame()
for (i in 1:show_num) {
  splitted <- unlist(strsplit(as.character(KK$geneID[i]), "/"))  # 拆分基因ID
  df <- rbind(df, data.frame(Gene = splitted, Pathway = rep(KK$Description[i], length(splitted))))
}

# 将GeneRatio从“5/80”格式转换为小数形式
KK$GeneRatio <- sapply(KK$GeneRatio, function(x) {  
  parts <- as.numeric(unlist(strsplit(x, "/")))
  round(parts[1] / parts[2], 2)
}) 

# 类型转换
KK$Count <- as.numeric(KK$Count)
KK$p.adjust <- as.numeric(KK$p.adjust)

# 倒序排序以便图中从上到下显示
KK <- KK[show_num:1, ] 

# 计算每个通路在Y轴上的显示位置（用于点图）
KK <- KK %>%
  mutate(ymax = cumsum(Count + 0.5)) %>%
  mutate(ymin = ymax - Count - 0.5) %>%
  mutate(label = (ymin + ymax) / 2)  # 计算y轴中点作为标签位置

# 绘制dot plot，展示GeneRatio、Count与p.adjust的关系
p2 <- ggplot() +
  geom_point(data = KK,
             aes(x = GeneRatio,
                 y = label,
                 size = Count,
                 color = p.adjust)) +
  scale_size_continuous(range = c(4, 8)) +
  theme_bw() +
  scale_colour_distiller(palette = "YlGnBu", direction = -1) + 
  labs(x = "GeneRatio", y = "") +
  ylim(min(KK$label), max(KK$label) + 0.8) +
  theme(axis.text.y = element_blank())  # 不显示y轴标签
p2

# 将基因-通路映射数据转换为ggsankey格式
df0 <- df %>%
  make_long(Gene, Pathway)

# 为了美观在节点名后补空格，并设置绘图顺序（先通路，后基因）
df0$node <- paste(df0$node, "       ")
df0$next_node <- paste(df0$next_node, "       ")
df0$node <- factor(df0$node, levels = c(
  paste(df$Pathway, "       ") %>% unique() %>% rev(),
  paste(df$Gene, "       ") %>% unique() %>% rev()
))

# 使用cols4all自定义调色板（可视化调色板）
mycol <- cols4all::c4a('rainbow_wh_rd', length(unique(df0$node)))

# 绘制桑基图
p4 <- ggplot(df0, aes(x = x,
                      next_x = next_x,
                      node = node,
                      next_node = next_node,
                      fill = node,
                      label = node)) +
  geom_sankey(flow.alpha = 0.6,
              flow.fill = 'grey',        # 流线填充颜色
              flow.color = 'grey80',     # 流线边缘颜色
              node.fill = mycol,         # 节点填充颜色
              smooth = 10,
              space = 0.5,
              width = 0.08) +
  geom_sankey_text(size = 3.2,
                   color = "black",
                   space = 0.5,
                   hjust = 1) +           # 文本靠右对齐
  theme_void() +
  theme(legend.position = 'none')        # 不显示图例

# 调整桑基图边距，在右侧为点图留出空间
p5 <- p4 + theme(plot.margin = unit(c(0, 5, 0, -2), units = "cm"))
p5

# 使用cowplot将桑基图和dot plot拼接为一张图
p6 = ggdraw() + 
  draw_plot(p5) + 
  draw_plot(p2, scale = 0.845, x = 0.53, y = -0.14, width = 0.5, height = 1.2) +
  theme(plot.margin = unit(c(0, 0, 1, 0), units = "cm"))
p6

# 保存合并图像为PDF
ggsave(filename = "data/2_GEO/KEGG_sankey2.pdf", height = 8, width = 9)

# 保存当前工作空间变量
save.image(file = "myWorkspace.RData")
# 读取保存的工作空间（如有需要）
# load("myWorkspace.RData")

# 加载PDF处理和图像处理的R包
library(pdftools)  
library(magick)  

# 定义将PDF转换为PNG图像的函数
PDF_TO_PNG <- function(pdf_file, png_file) {
  pdf_images <- pdf_convert(pdf_file, format = "png", dpi = 300)  # 转换PDF为PNG图像
  for (i in seq_along(pdf_images)) {
    image <- image_read(pdf_images[i])  
    image_write(image, path = png_file, format = "png")  
  } 
}

# 遍历指定目录下所有PDF文件并转换为PNG格式
pdf_files <- list.files("data/2_GEO", pattern = "\\.pdf$", full.names = TRUE)

for (pdf_path in pdf_files) {
  png_path <- file.path(paste0(strsplit(pdf_path, split = '\\.')[[1]][1], ".png"))
  PDF_TO_PNG(pdf_path, png_path)
}