今天没什么废话,主要讲讲如何用pseudobulk data
做hdWGCNA
。🙃
肯定有小伙伴问这种方法和之前讲的有什么区别呀,为什么要用这个!?🫠
之前我们用的方法都是基于metacells or metaspots
的方法。🌟
如果说你的数据量非常大,如果还用之前的方法,运行起来会非常慢,这个时候如果使用pseudobulk
的话,就会显著加速啦。⏩
当然啦,优点远不止这些,后面再慢慢介绍吧。🙊
rm(list = ls())
library(Seurat)
library(tidyverse)
library(cowplot)
library(patchwork)
library(WGCNA)
library(hdWGCNA)
记得设置一下哦!~😂
theme_set(theme_cowplot())
set.seed(123)
enableWGCNAThreads(nThreads = 8)
seurat_obj <- readRDS('Zhou_2020.rds')
seurat_obj <- SetupForWGCNA(
seurat_obj,
gene_select = "fraction",
fraction = 0.05,
wgcna_name = "pseudobulk"
)
length(GetWGCNAGenes(seurat_obj))
# Construt the pseudobulk expression profiles
datExpr <- ConstructPseudobulk(
seurat_obj,
group.by = 'cell_type',
replicate_col = 'Sample',
assay = 'RNA',
slot = 'counts', # this should always be counts!
min_reps = 10
)
# compute log2CPM normalization
# You can substitute this with another normalization of your choosing.
cpm <- t(apply(datExpr, 1, function(x){
y <- (x) / sum(x) * 1000000
log2(y + 1)
}))
seurat_obj <- SetDatExpr(
seurat_obj,
mat = cpm
)
如果你只想对其中一种细胞进行WGCNA
,那就运行下面的代码,标注一下你要的细胞就行了。
# # we only want the data from the astrocytes
# cur_group <- 'ASC'
#
# # subset the matrix for just this cell type
# cur_cpm <- cpm[grepl(cur_group, rownames(cpm)),]
#
# seurat_obj <- SetDatExpr(
# seurat_obj,
# mat = cur_cpm
# )
# select the soft power threshold
seurat_obj <- TestSoftPowers(seurat_obj)
# construct the co-expression network and identify gene modules
seurat_obj <- ConstructNetwork(
seurat_obj,
tom_name='pseudobulk',
overwrite_tom=TRUE,
mergeCutHeight=0.15
)
PlotDendrogram(seurat_obj, main='pseudobulk dendrogram')
接下来,我们在单细胞水平上计算模块特征基因(ME
)和基于特征基因的连通性(kMEs
)。
# compute the MEs and kMEs
seurat_obj <- ModuleEigengenes(seurat_obj)
seurat_obj <- ModuleConnectivity(seurat_obj)
# get MEs from seurat object
MEs <- GetMEs(seurat_obj)
mods <- colnames(MEs); mods <- mods[mods != 'grey']
# add MEs to Seurat meta-data for plotting:
meta <- seurat_obj@meta.data
seurat_obj@meta.data <- cbind(meta, MEs)
# plot with Seurat's DotPlot function
p <- DotPlot(seurat_obj, features=mods, group.by = 'cell_type')
# reset the metadata
seurat_obj@meta.data <- meta
# flip the x/y axes, rotate the axis labels, and change color scheme:
p <- p +
RotatedAxis() +
scale_color_gradient(high='red', low='grey95') +
xlab('') + ylab('')
p
# compute the co-expression network umap
seurat_obj <- RunModuleUMAP(
seurat_obj,
n_hubs = 5,
n_neighbors=10,
min_dist=0.4,
spread=3,
supervised=TRUE,
target_weight=0.3
)
# get the hub gene UMAP table from the seurat object
umap_df <- GetModuleUMAP(seurat_obj)
# plot with ggplot
p <- ggplot(umap_df, aes(x=UMAP1, y=UMAP2)) +
geom_point(
color=umap_df$color,
size=umap_df$kME*2
) +
umap_theme()
# add the module names to the plot by taking the mean coordinates
centroid_df <- umap_df %>%
dplyr::group_by(module) %>%
dplyr::summarise(UMAP1 = mean(UMAP1), UMAP2 = mean(UMAP2))
p <- p + geom_label(
data = centroid_df,
label=as.character(centroid_df$module),
fontface='bold', size=2)
p
最后祝大家早日不卷!~