最近我们的空转单细胞专辑会更新一篇文章复现学习,这篇文章于2023年发表在高分杂志Cancer Res上,文献标题为《Spatial Transcriptomics Depict Ligand–Receptor Cross-talk Heterogeneity at the Tumor-Stroma Interface in Long-Term Ovarian Cancer Survivors》。
数据背景和空转数据读取以及基本的降维聚类分群见:
今天的学习内容为,对空转的数据进行注释,使用MIA的方法,处理步骤如下:
即得到Fig1中的C图结果。
单细胞数据来自 Shih et al PLoS ONE 13(11): e0206785, GEO编号:https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE118828。
注释以及差异分析见帖子:IF10+空转文献复现(三):空转注释需要的参考单细胞数据处理与注释
现在先创建目录,加载空转的分析结果:空转分析见前面的帖子一、二
save_dir = '../results'
load("../results/seurat_object.RData")
############# save results to the folder ###############
folder = paste0(save_dir,'/MIA')
if (!file.exists(folder)){
dir.create(folder)
}
note:这里原代码会有一点点问题修改为avg_log2FC
###### load scRNAseq differentially expressed genes Shih 2018 PLoS One. 2018; 13(11): e0206785 ######
load_scRNA_DEG <- function(){
fnm = paste0(save_dir,'/scRNAseq/scRNAseq_differentially_expressed_genes_shih_2018.xls')
tmp1 = read.table(fnm,header = TRUE, stringsAsFactors = F, check.names=F)
scRNA = tmp1[tmp1$p_val<10^(-5),]
clusters = unique(scRNA$cluster)
n_sc = length(clusters)
scgenelist = list()
for (i in seq(1,n_sc)){
scgenelist[i] = list(scRNA[scRNA$cluster==clusters[i],"gene"])
}
return(scgenelist)
}
###### load ST differentially expressed genes ######
load_ST_DEG <- function(SOE){
tmp = SOE@meta.data$seurat_clusters
n_st = nlevels(tmp)
stgenelist = list()
for (i in seq(0,n_st-1,by = 1)){
tmp_marker <- FindMarkers(SOE, ident.1 = i, min.pct = 0.25)
tmp_marker$gene <- rownames(tmp_marker)
tmp_mk <- tmp_marker %>% select(gene, everything())
tmp_mk <- tmp_mk[order(-tmp_mk$avg_log2FC),] # 这里原代码会有一点点问题修改为avg_log2FC
tmp_mk <- tmp_mk[tmp_mk$p_val<0.01,]
tmp_mk <- tmp_mk[tmp_mk$avg_log2FC>0,] # 这里原代码会有一点点问题修改为avg_log2FC
stgenelist[i+1] = list(tmp_mk$gene)
}
return(stgenelist)
}
这里的核心主要在于:phyper(N_intersect, N_scRNA, N_tot-N_scRNA, N_ST, lower.tail = FALSE, log.p = FALSE)
使用的原理为累积超几何分布,注意参数 lower.tail 的含义。
图示理解:
###### compute MIA ######
compute_MIA <- function(scgenelist,stgenelist,SOE,SID){
n_sc = length(scgenelist)
n_st = length(stgenelist)
MIA_enrich = matrix(0,n_sc,n_st)
MIA_deplete = matrix(0,n_sc,n_st)
MIA = matrix(0,n_sc,n_st)
#N_tot = SOE@assays$RNA@counts@Dim[1] # 这里因为版本也修改了一下
N_tot = SOE@assays$RNA$counts@Dim[1]
for (i in seq(1,n_sc)){
for (j in seq(1,n_st)){
N_scRNA = length(scgenelist[[i]])
N_ST = length(stgenelist[[j]])
N_intersect = length(intersect(scgenelist[[i]],stgenelist[[j]]))
P = phyper(N_intersect, N_scRNA, N_tot-N_scRNA, N_ST, lower.tail = FALSE, log.p = FALSE)
MIA_enrich[i,j] = -log10(P)
MIA_deplete[i,j] = -log10(1-P)
if (MIA_enrich[i,j]>=MIA_deplete[i,j]){
MIA[i,j] = MIA_enrich[i,j]
} else { MIA[i,j] = -MIA_deplete[i,j]
}
}
}
MIA = data.frame(MIA)
rownames(MIA) = c("Ep_1","FB_1","T","Ep_2","Myeloid","Ep_3","FB_2","Ep_4","Endothelial","B","FB_3")
colnames(MIA) = levels(SOE@meta.data$seurat_clusters)
fnm = paste0(folder,'/',SID,'.xls')
MIA = format(MIA,digits = 3)
write.table(MIA,fnm,quote=F,sep="\t", col.names = NA)
}
有了函数,这里就特别快:
scgenelist = load_scRNA_DEG()
str(scgenelist)
11个亚群的差异基因列表。
有了预定义函数就特别快:
SOE = sample4
SID = "A4"
stgenelist = load_ST_DEG(SOE)
str(stgenelist)
compute_MIA(scgenelist,stgenelist,SOE,SID)
SOE = sample5
SID = "A5"
stgenelist = load_ST_DEG(SOE)
compute_MIA(scgenelist,stgenelist,SOE,SID)
SOE = sample10
SID = "A10"
stgenelist = load_ST_DEG(SOE)
str(stgenelist)
compute_MIA(scgenelist,stgenelist,SOE,SID)
SOE = sample12
SID = "A12"
stgenelist = load_ST_DEG(SOE)
str(stgenelist)
compute_MIA(scgenelist,stgenelist,SOE,SID)
首先将4个空转的MIA结果合并在一起:
######### display MIA for multiple samples ##########
load_MIA <- function(SID){
fnm = paste0(folder,'/',SID,'.xls')
temp1 = read.table(fnm,header = TRUE, stringsAsFactors = F, check.names=F)
cnames = colnames(temp1)
cnames1 = paste0(SID,'_c',cnames)
colnames(temp1) = cnames1
return(temp1)
}
#### merge MIA from different samples into one matrix
temp = load_MIA('A4')
for (id in c('A5','A10','A12')){
temp1 = load_MIA(id)
temp = merge(temp,temp1,by = 0)
rownames(temp) = temp[,1]
temp = temp[,-1]
}
head(temp)
绘制热图:
######### make heatmap #########
library('RColorBrewer')
my_palette <- colorRampPalette(c("red", "black", "cyan"))(n = 100)
data = data.matrix(temp)
data[data>40] = 40
# 将INF值替换为0
data[data == -Inf] <- 0
filename = paste0(folder,'/MIA_heatmap.pdf')
pdf(filename,10,6)
heatmap.2(data,
Colv="none",
dendrogram='none',
col = my_palette,
trace="none",
scale = "none",
density.info="none",
lhei=c(1,6), lwid=c(1,4),
keysize=0.3, key.par = list(cex=0.5),
margins = c(8,8),
cexRow=1,cexCol=1,srtCol=90,
key.title = 'P value (-log10)',
xlab = 'ST clusters'
)
dev.off()
到这里就分析完啦,其中的代码值得细细学习与理解~
生成的结果目录:
(待续)