在科研可视化中,热图(heatmap) 是用来展示高维基因表达矩阵的经典利器,能够清晰地揭示样本间和基因间的表达模式。而 森林图(forest plot) 则以其直观的置信区间和效应量展示能力,在分析基因与临床特征的关联中大放异彩。
但你有没有想过 —— 🌿 当热图遇上森林图,会擦出怎样的火花?
📌 今天分享一套完整 R 代码教程,手把手教你如何将热图与森林图组合起来,一张图同时展现:
# 加载需要用到的R包
library(dplyr)
library(tidyverse)
library(ComplexHeatmap)
library(ggplot2)
library(grid)
gene_row <- sigenes %>% filter(.$SYMBOL %in% com_genes) %>%
select(c("method","or","or_lci95","or_uci95","SYMBOL"))
gene_row$Risk <- ifelse(gene_row$or > 1,"Increasing","Decreasing")
rownames(gene_row) <- gene_row$SYMBOL
gene_names <- rownames(gene_row)
# 添加 log2 列
gene_row$logOR <- log2(gene_row$or)
gene_row$log_lower <- log2(gene_row$or_lci95)
gene_row$log_upper <- log2(gene_row$or_uci95)
## 这视自己的临床数据而定,可以是多种分组,比如按性别,种族,肿瘤分期,组织来源,等等
cli_col
# 提取共同基因表达并按行标准化
expr_matrix <- dat[rownames(dat) %in% com_genes, ]
expr_scaled <- t(scale(t(expr_matrix)))
expr_scaled[is.na(expr_scaled)] <- 0
# 确保样本顺序一致
cli_col <- cli_col[colnames(expr_scaled), ]
🎨定义不同分组的颜色:
cli_col_fixed <- cli_col %>%
mutate(
Group1 = factor(Group1, levels = c("Tumor", "Normal")),
Group2 = factor(Group2, levels = c("High", "Low")),
Group3 = factor(Group3, levels = c("Positive", "Negative")),
Group4 = factor(Group4, levels = c("Yes", "No")),
Group5 = factor(Group5, levels = c("GroupA", "GroupB"))
)
# 为不同变量定义颜色方案,根据分组的内容填入
status_colors <- list(
Group1 = c("Tumor" = "#084b9a", "Normal" = "#bd0026"),
Group2 = c("" = "#9c6ba3", "" = "#ff9b9d"),
Group3 = c("" = "#9c6ba3", "" = "#ff9b9d"),
Group4 = c("" = "#9c6ba3", "" = "#ff9b9d"),
Group5 = c("" = "#fbdcec", "" = "#6d3b00")
)
构建列注释——
col_anno <- HeatmapAnnotation(
Group = cli_col_fixed$Group1,
ER = cli_col_fixed$Group2,
PR = cli_col_fixed$Group3,
HER2 = cli_col_fixed$Group4,
Tissue_location = cli_col_fixed$Group5,
col = status_colors,
annotation_name_side = "left",
show_annotation_name = TRUE,
annotation_legend_param = list(
title_gp = gpar(fontsize = 8),
labels_gp = gpar(fontsize = 6)
)
)
forest_graphics <- lapply(seq_along(gene_names), function(i) {
gene <- gene_names[i]
function(x, y, w, h) {
logor <- gene_row[gene, "logOR"]
low <- gene_row[gene, "log_lower"]
up <- gene_row[gene, "log_upper"]
col <- ifelse(gene_row[gene, "or"] > 1, "red", "blue")
xscale <- c(-4, 4)
tx <- function(val) (val - xscale[1]) / diff(xscale)
grid.segments(tx(low), y, tx(up), y, gp = gpar(col = "black", lwd = 1.5))
grid.points(tx(logor), y, pch = 16, size = unit(2.5, "mm"), gp = gpar(col = col))
grid.lines(x = unit(tx(0), "npc"), y = unit(c(0, 1), "npc"),
gp = gpar(col = "gray50", lty = 2, lwd = 1))
}
})
names(forest_graphics) <- gene_names
forest_anno <- rowAnnotation(
Forest = anno_customize(
gene_names,
graphics = forest_graphics,
which = "row",
width = unit(4, "cm")
)
)
ht <- Heatmap(
as.matrix(expr_scaled), # 使用标准化后的表达矩阵作为热图主体数据(矩阵格式)
left_annotation = forest_anno, # 左侧添加行注释:森林图,用于展示每个基因的log(OR)及其置信区间
top_annotation = col_anno, # 顶部添加列注释:样本的临床信息(如ER状态、PR状态等)
column_split = cli_col_fixed$Group1, # 根据分组变量(如肿瘤/正常)将样本分组显示,列方向上分面
row_split = gene_row$Risk, # 根据基因的风险方向(Increasing / Decreasing)对基因进行分组显示,行方向上分面
show_column_names = FALSE, # 不显示列名(样本名),避免图像拥挤
row_names_gp = gpar(fontsize = 7), # 设置行名(基因名)的字体大小为 7
cluster_rows = FALSE, # 不对基因进行聚类,保持原有顺序
cluster_columns = FALSE, # 不对样本进行聚类,保持给定的顺序
heatmap_legend_param = list( # 设置热图图例的样式
title = "Expression", # 图例标题为“Expression”
labels_gp = gpar(fontsize = 6) # 图例标签字体大小为 6
)
)
然后导出
# 使用 draw() 控制图例合并与位置
pdf("./output/heatmap.pdf", width = 20, height = 12)
draw(ht,
annotation_legend_side = "right", # 注释图例在右边
heatmap_legend_side = "right", # 热图图例也在右边
merge_legends = TRUE # 合并所有图例到同一列
)
dev.off()