scRNA | 和顶刊学分析，OR值展示不同分组的细胞类型差异

生信补给站

发布于 2024-06-06 15:48:00

2040

发布于 2024-06-06 15:48:00

在对单细胞数据进行注释后，通常会使用柱形图比较不同分组之间的cluster/celltype差异 scRNA分析|单细胞文献Fig1中的分组umap图和细胞比例柱形图，本文介绍张老师2021年发表于SCIENCE的Pan-cancer single-cell landscape of tumor-infiltrating T cells 文献中OR比值的方法（OR>1.5标示倾向在该分组中分布，OR<0.5标示不倾向在该分组中分布，详见文献methods），来比较不同分组（正常组织，肿瘤组织，PBMC，用药前后等）间cluster/celltype之间的分布差异。该方法在越来越多的文献中出现。

一载入R包，数据

1 ，载入必要的R包

#remotes::install_github("Japrin/sscVis")
library("sscVis")
library("data.table")
library("grid")
library("cowplot")
library("ggrepel")
library("readr")
library("plyr")
library("ggpubr")
library("tidyverse")
library(viridis)
library(Seurat)
library(pheatmap)

2，载入函数

这里使用24年NG文献Multi-omic profiling of clear cell renal cell carcinoma identifies metabolic reprogramming associated with disease progression中提供的OR分析的2个主函数

do.tissueDist <- function(cellInfo.tb = cellInfo.tb,
                          meta.cluster = cellInfo.tb$meta.cluster,
                          colname.patient = "patient",
                          loc = cellInfo.tb$loc,
                          out.prefix,
                          pdf.width=3,
                          pdf.height=5,
                          verbose=0){
  ##input data 
  library(data.table)
  dir.create(dirname(out.prefix),F,T)

  cellInfo.tb = data.table(cellInfo.tb)
  cellInfo.tb$meta.cluster = as.character(meta.cluster)

  if(is.factor(loc)){
    cellInfo.tb$loc = loc
  }else{cellInfo.tb$loc = as.factor(loc)}

  loc.avai.vec <- levels(cellInfo.tb[["loc"]])
  count.dist <- unclass(cellInfo.tb[,table(meta.cluster,loc)])[,loc.avai.vec]
  freq.dist <- sweep(count.dist,1,rowSums(count.dist),"/")
  freq.dist.bin <- floor(freq.dist * 100 / 10)
  print(freq.dist.bin)

  {
    count.dist.melt.ext.tb <- test.dist.table(count.dist)
    p.dist.tb <- dcast(count.dist.melt.ext.tb,rid~cid,value.var="p.value")
    OR.dist.tb <- dcast(count.dist.melt.ext.tb,rid~cid,value.var="OR")
    OR.dist.mtx <- as.matrix(OR.dist.tb[,-1])
    rownames(OR.dist.mtx) <- OR.dist.tb[[1]]
  }

  sscVis::plotMatrix.simple(OR.dist.mtx,
                            out.prefix=sprintf("%s.OR.dist",out.prefix),
                            show.number=F,
                            waterfall.row=T,par.warterfall = list(score.alpha = 2,do.norm=T),
                            exp.name=expression(italic(OR)),
                            z.hi=4,
                            palatte=viridis::viridis(7),
                            pdf.width = 4, pdf.height = pdf.height)
  if(verbose==1){
    return(list("count.dist.melt.ext.tb"=count.dist.melt.ext.tb,
                "p.dist.tb"=p.dist.tb,
                "OR.dist.tb"=OR.dist.tb,
                "OR.dist.mtx"=OR.dist.mtx))
  }else{
    return(OR.dist.mtx)
  }
}

test.dist.table <- function(count.dist,min.rowSum=0)
{
  count.dist <- count.dist[rowSums(count.dist)>=min.rowSum,,drop=F]
  sum.col <- colSums(count.dist)
  sum.row <- rowSums(count.dist)
  count.dist.tb <- as.data.frame(count.dist)
  setDT(count.dist.tb,keep.rownames=T)
  count.dist.melt.tb <- melt(count.dist.tb,id.vars="rn")
  colnames(count.dist.melt.tb) <- c("rid","cid","count")
  count.dist.melt.ext.tb <- as.data.table(ldply(seq_len(nrow(count.dist.melt.tb)), function(i){
    this.row <- count.dist.melt.tb$rid[i]
    this.col <- count.dist.melt.tb$cid[i]
    this.c <- count.dist.melt.tb$count[i]
    other.col.c <- sum.col[this.col]-this.c
    this.m <- matrix(c(this.c,
                       sum.row[this.row]-this.c,
                       other.col.c,
                       sum(sum.col)-sum.row[this.row]-other.col.c),
                     ncol=2)
    res.test <- fisher.test(this.m)
    data.frame(rid=this.row,
               cid=this.col,
               p.value=res.test$p.value,
               OR=res.test$estimate)
  }))
  count.dist.melt.ext.tb <- merge(count.dist.melt.tb,count.dist.melt.ext.tb,
                                  by=c("rid","cid"))
  count.dist.melt.ext.tb[,adj.p.value:=p.adjust(p.value,"BH")]
  return(count.dist.melt.ext.tb)
}

该分析只需要 分组信息 和 cluster/celltype结果，也就是meta.data 中的两列信息。

二 OR分析

1，载入单细胞数据

仍然使用之前的sce2数据，为减少计算量提取Myeloid亚群做示例，注意该分析需要不同分组 的 cluster/celltype细胞数均不为 0。

load("sce.anno.RData")
sce.Mye <- subset(sce2,celltype %in% c("Myeloid" ) )
sce.Mye <- NormalizeData(sce.Mye)
sce.Mye <- FindVariableFeatures(sce.Mye, selection.method = "vst", nfeatures = 2000)
sce.Mye <- ScaleData(sce.Mye)
sce.Mye <- RunPCA(sce.Mye, npcs = 20)
#标准流程，参数不变
sce.Mye <- sce.Mye %>% 
  RunUMAP(dims = 1:20) %>% 
  FindNeighbors(dims = 1:20) %>% 
  FindClusters(resolution = c(0.05, 0.1,0.2,0.4,0.5)) 
DimPlot(sce.Mye, group.by = "RNA_snn_res.0.2",label = F)

table(sce.Mye$group ,sce.Mye$RNA_snn_res.0.2)
#        0   1   2   3   4   5
#  MET   9   4  10 162 156   7
#  PT  588 399 205  21  19  35

2，计算OR值

由于do.tissueDist函数限定了meta.cluster = cellInfo.tbmeta.cluster, loc = cellInfo.tbloc, 为减少报错建议修改我们输入矩阵的名字来适配函数。

meta <- sce.Mye@meta.data
# 修改名字
meta$loc <- meta$group
meta$meta.cluster <- meta$RNA_snn_res.0.2
# 指定输出文件路径及前缀
out.prefix <- "./Fig_OR"

#主分析函数
OR.immune.list <- do.tissueDist(cellInfo.tb=meta,
                                out.prefix=sprintf("%s.Immune_cell",out.prefix),
                                pdf.width=4,pdf.height=8,verbose=1
)

结果存放在OR.immune.list的列表中，含有OR值以及对应的P值，提取对应的数据绘制可视化热图。

这就完成了真实数据的OR分析，受限细胞数和分组，本图不是很美观。

3，使用文献panT数据（图更好看）

文献中的int.CD8.S35.meta.tb.rds就是meta.data矩阵文件，和上面的是一样的，只是问了颜值高一点。

meta <- read_rds("int.CD8.S35.meta.tb.rds")
head(meta)

OR.immune.list <- do.tissueDist(cellInfo.tb=meta,
                                out.prefix=sprintf("%s.Immune_cell",out.prefix),
                                pdf.width=4,pdf.height=8,verbose=1
)

其中loc 和 meta.cluster均有，因此无需更改名字直接函数分析即可。

4，可视化

函数默认使用sscVis::plotMatrix.simple绘制，热图中没有P值的结果。前面提到结果存放在OR.immune.list 列表中，那么就可以分别提取OR结果和 p值结果，然后使用pheatmap自定义绘制热图或者其他可视化形式。

# a 存OR值结果
a=OR.immune.list[["OR.dist.tb"]]
a <- as.data.frame(a)
rownames(a) <- a$rid
a <- a[,-1]
a <- na.omit(a)
a

# b存P值结果
b <- OR.immune.list$count.dist.melt.ext.tb[,c(1,2,6)]
b <- spread(b,key = "cid", value = "adj.p.value")
b <- data.frame(b[,-1],row.names = b$rid)
b <- b[rownames(a),]
b

将P值改为*的展示形式，绘制热图展示P值结果。

考虑到OR值在文献中定义的0.5 和 1.5 值，这里设置bk参数。

col <- viridis(11,option = "D")
b = ifelse(b >= 0.05&(a>1.5|a<0.5), "",
           ifelse(b<0.0001&(a>1.5|a<0.5),"****",
                  ifelse(b<0.001&(a>1.5|a<0.5),"***",
                         ifelse(b<0.01&(a>1.5|a<0.5),"**",
                                ifelse(b < 0.05&(a>1.5|a<0.5),"*","")))))

bk=c(seq(0,0.99,by=0.01),seq(1,2,by=0.01))

pheatmap(a[,], border_color = "NA", fontsize = 9,cellheight = 12,cellwidth = 20,clustering_distance_rows="correlation",
         display_numbers = b,number_color="black",fontsize_number=10,
         cluster_col=F, cluster_rows=T, border= NULL, breaks=bk, treeheight_row = 20,treeheight_col = 20,
         color = c(colorRampPalette(colors = col[1:6])(length(bk)/2),
                   colorRampPalette(colors = col[6:11])(length(bk)/2)))