习惯了在做生物信息学数据分析或者相关知识点整理之前,都下意识的问一下人工智能大模型,比如关于热图的绘制,大模型其实是会优先推荐ggplot2,但是对绝大部分小伙伴来说,不如pheatmap那样的入手简单!
所以,是时候介绍一下ggplot2热图扩展包(ggalign),它可以让你抛去很多ggplot细节但是又保留了它的高度定制化的优点,让我们一起来看看作者对它的介绍吧: ggplot2热图扩展包(ggalign)
可通过CRAN安装install.packages("ggalign")
,但最新版本更改了大量内容,建议安装GitHub版本。
# install.packages("remotes")
remotes::install_github("Yunuuuu/ggalign")
library(ggalign)
# 构建数据集
set.seed(123)
small_mat <- matrix(rnorm(81), nrow = 9)
rownames(small_mat) <- paste0("row", seq_len(nrow(small_mat)))
colnames(small_mat) <- paste0("column", seq_len(ncol(small_mat)))
ggheatmap(small_mat) + scale_fill_viridis_c()
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("t") +
align_dendro(aes(color = branch), k = 3) +
geom_point(aes(color = branch, y = y)) +
scale_color_brewer(palette = "Dark2")
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("t") +
align_kmeans(3L)
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("t") +
align_group(sample(letters[1:4], ncol(small_mat), replace = TRUE))
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("l") +
align_reorder(rowMeans)
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("t") +
align_dendro(aes(color = branch), k = 3) +
geom_point(aes(color = branch, y = y)) +
scale_color_brewer(palette = "Dark2") +
ggalign(aes(y = value)) +
geom_boxplot(aes(fill = .panel, group = interaction(.x, .panel))) +
scale_fill_brewer(palette = "Dark2")
ggheatmap(small_mat) +
scale_fill_viridis_c() +
hmanno("t", size = 0.5) +
align_dendro(aes(color = branch), k = 3L) +
ggalign(aes(y = value), data = rowSums) +
geom_bar(stat = "identity", aes(fill = factor(.panel))) +
scale_fill_brewer(name = NULL, palette = "Dark2") +
hmanno("l", size = 0.5) +
align_dendro(aes(color = branch), size = 0.5, k = 4L) +
scale_x_reverse() +
ggalign(aes(x = value), data = rowSums) +
geom_bar(
aes(y = .y, fill = factor(.y)),
stat = "identity",
orientation = "y"
) +
scale_fill_brewer(name = NULL, palette = "Paired", guide = "none") +
scale_x_reverse()
(ggstack(small_mat) +
ggheatmap() +
ggheatmap() &
scale_fill_viridis_c() &
theme(axis.text.x = element_text(angle = -60, hjust = 0))) +
stack_active() +
align_dendro(aes(color = branch), k = 4L, size = 0.2) +
scale_color_brewer(palette = "Dark2")
ggstack(small_mat, "v") +
align_dendro(aes(color = branch),
k = 4L, size = 0.2,
theme = theme(axis.text.x = element_blank())
) +
scale_color_brewer(palette = "Dark2") +
ggheatmap() +
ggheatmap() &
scale_fill_viridis_c() &
theme(axis.text.x = element_text(angle = -60, hjust = 0))
比如我们可以先去geo数据库里面下载 GSE104171_NormalizedMatrix.txt.gz 这个文件,然后提取里面的指定的基因的表达量矩阵:
rm(list = ls())#清空当前的工作环境
options(stringsAsFactors = F)#不以因子变量读取
options(scipen = 20)#不以科学计数法显示
library(data.table)
library(tinyarray)
data<-data.table::fread("GSE104171_NormalizedMatrix.txt.gz",
data.table = F)
data=data[!duplicated(data$V1),]
mat<-data[,c( 2:ncol(data))]
rownames(mat)=data[,1]
mat[1:4,1:4]
keep_feature <- rowSums (mat > 1) > 1 ;table(keep_feature)
ensembl_matrix <- mat[keep_feature, ]
rownames(ensembl_matrix)=rownames(mat)[keep_feature]
ensembl_matrix[1:4,1:4]
symbol_matrix=ensembl_matrix
symbol_matrix[1:4,1:4]
colnames(symbol_matrix)
library(AnnoProbe)
head(rownames(symbol_matrix))
ids=annoGene(rownames(symbol_matrix),'SYMBOL','human')
head(ids)
tail(sort(table(ids$biotypes)))
ids=ids[ids$biotypes=='protein_coding',]
symbol_matrix=symbol_matrix[rownames(symbol_matrix) %in% ids$SYMBOL,]
colnames(symbol_matrix)
boxplot(symbol_matrix[,1:4])
cor_gene<-c('CD2','CD3E','CD3D','TBX21','CD8B','PRF1',
'GZMA','GZMB','ITGAM','ARG1','CYBB','OLR1',
'FUT4','CEACAM8','S100A8','S100A9')
exprSet=symbol_matrix[cor_gene,]
可以看到,是16个基因在74个样品的表达量矩阵:
我们首先呢,使用pheatmap绘图 :
p1=pheatmap::pheatmap(cor(t(exprSet)))
matrix_chosen<-t(scale(t(exprSet)))
tmp<-colnames(matrix_chosen)
tmp<-rep('',ncol(matrix_chosen))
p2=pheatmap::pheatmap(matrix_chosen,labels_col=tmp,
legend_breaks=seq(-3,3,1))
hc=cutree(p2$tree_col,2)
# MN-MDSC signature genes (ITGAM, ARG1, CYBB, OLR1, FUT4, CEACAM8, S100A8, and S100A9)
# cytotoxic lymphocyte signature genes (CD2, CD3E, CD3D, TBX21, CD8B, PRF1, GZMA, and GZMB)
ac=as.data.frame(as.character(hc))
rownames(ac)=colnames(matrix_chosen)
p3=pheatmap::pheatmap(matrix_chosen,labels_col=tmp,
annotation_col = ac,
legend_breaks=seq(-3,3,1))
library(cowplot)
require(ggplotify)
cowplot::plot_grid(as.ggplot(p1) , as.ggplot(p3), ncol=2)
可以看到,比较麻烦的把样品分成了两组:
如果是ggalign就一句话 :
library(ggalign)
ggheatmap(matrix_chosen) +
scale_fill_viridis_c() +
hmanno("t") +
align_dendro(aes(color = branch), k = 2) +
geom_point(aes(color = branch, y = y)) +
scale_color_brewer(palette = "Dark2")
同样的可以把样品分成两组: