首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >空转联合单细胞分析(九):复现Nature子刊之Visium HD空转RCTD反卷积演示

空转联合单细胞分析(九):复现Nature子刊之Visium HD空转RCTD反卷积演示

作者头像
KS科研分享与服务-TS的美梦
发布2025-12-31 18:27:34
发布2025-12-31 18:27:34
30
举报

对于visium HD数据,没有执行细胞分割,bins其实也不能代表单个细胞,只是8um和16um的精度已经足够接近,也可以进行反卷积进行细胞注释。这里演示RCTD,和visium中的演示类似。详细信息参考https://mp.weixin.qq.com/s/y_05z-p0G5mUCp1lJA3Hew。同时,我们参考原文,构建人结直肠癌参考数据seurat object。

代码语言:javascript
复制
setwd("~/data_analysis/10X_space/CRC_visiumHD/")
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
#devtools::install_github("dmcable/spacexr", build_vignettes = FALSE)
library(spacexr)
library(Matrix)
library(stringr)

1、人结直肠癌参考数据集构建

同样的,需要构建一个参考数据集,数据和代码论文都提供了(https://www.nature.com/articles/s41588-025-02193-3#Sec8),因为作者提供了数据和可运行代码https://github.com/10XGenomics/HumanColonCancer_VisiumHD/blob/main/Methods/FlexSingleCell.R,所以得到的结果就是原文使用的结果。数据下载:https://www.10xgenomics.com/platforms/visium/product-family/dataset-human-crc。这个数据细胞来源于结直肠癌(CRC)组织样本中50µm厚的FFPE切片,共计5名患者,并包含其中3名患者的配对癌旁正常组织。用于本次分析的FFPE切片与Visium HD实验所用的组织切片为连续切片,以确保构建的单细胞参考图谱能够高度代表用于空间转录组分析的组织区域。这里按照原文的代码构建一份数据,也可以作为单细胞参考数据集或者学习数据集。10X官网提供的数据是cellranger aggr输出的矩阵结果。

代码语言:javascript
复制
wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_count_filtered_feature_bc_matrix.h5
wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_aggregation.csv
 wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_count_analysis.tar.gz
代码语言:javascript
复制
ColonFlex.data <- Read10X_h5('./CRC_GEX/HumanColonCancer_Flex_Multiplex_count_filtered_feature_bc_matrix.h5') 

现在读入的矩阵ColonFlex.data是所有样本的混合,需要对cellid区分sample及分组,生成metadata!用到的文件是cellranger aggr生成的HumanColonCancer_Flex_Multiplex_aggregation.csv。

代码语言:javascript
复制
# Read aggregation_csv.csv file to be used as MetaData ( Patient, etc)
MetaData<-read.csv('./CRC_GEX/HumanColonCancer_Flex_Multiplex_aggregation.csv')#读取aggregation的样本信息表
MetaData$Patient<-sapply(strsplit(MetaData$sample_id,"_"),function(X){return(X[6])})
MetaData$BC<-sapply(strsplit(MetaData$sample_id,"_"),function(X){return(X[7])})
MetaData$Condition<-gsub("P[0-9]","",MetaData$Patient)
Index<-as.numeric(sapply(strsplit(colnames(ColonFlex.data),"-"),function(X){return(X[2])}))
MetaData<-MetaData[Index,3:4]
MetaData$Barcode<-colnames(ColonFlex.data)
rownames(MetaData)<-MetaData$Barcode

创建seurat obj,同时添加metadata。计算线粒体基因比例,用于质控。这里作者没有再跑UMAP,使用的是cellranger aggr分析的UMAP结果,文件在HumanColonCancer_Flex_Multiplex_count_analysis.tar.gz。

代码语言:javascript
复制
# Cell Ranger Results
UMAP<-read.csv('./CRC_GEX/analysis/umap/gene_expression_2_components/projection.csv')
MetaData<-cbind(MetaData,UMAP[match(MetaData$Barcode,UMAP$Barcode),2:3])
代码语言:javascript
复制
# Create Seurat Object
ColonCancer_Flex<-CreateSeuratObject(ColonFlex.data,meta.data = MetaData)
代码语言:javascript
复制
# Add % MT
ColonCancer_Flex[["MT.percent"]] <- PercentageFeatureSet(ColonCancer_Flex, pattern = "^MT-")
代码语言:javascript
复制
# Add UMAP projection
ColonCancer_Flex[["umap"]] <- CreateDimReducObject(embeddings = as.matrix(MetaData[,c("UMAP.1","UMAP.2")]), key = "UMAP_", assay = DefaultAssay(ColonCancer_Flex))

数据质控:

代码语言:javascript
复制
# UMI and Gene Threshold (by removing 5%)
UMI_TH<-quantile(ColonCancer_Flex$nCount_RNA,c(0.025,0.975))
Gene_TH<-quantile(ColonCancer_Flex$nFeature_RNA,c(0.025,0.975))
UMI_TH
Gene_TH
代码语言:javascript
复制
##    2.5%   97.5% 
##   311.0 16172.8
代码语言:javascript
复制
##   2.5%  97.5% 
##  242.0 5275.8
代码语言:javascript
复制
# Add variable with QC filter status
ColonCancer_Flex$QCFilter<-ifelse(ColonCancer_Flex$MT.percent < 25 & 
                                    ColonCancer_Flex$nCount_RNA > UMI_TH[1] & ColonCancer_Flex$nCount_RNA < UMI_TH[2] & 
                                    ColonCancer_Flex$nFeature_RNA > Gene_TH[1] & ColonCancer_Flex$nFeature_RNA < Gene_TH[2],"Keep","Remove")
代码语言:javascript
复制
# Remove bcs that failed QC
ColonCancer_Flex<-subset(ColonCancer_Flex,cells=colnames(ColonCancer_Flex)[ColonCancer_Flex$QCFilter=="Keep"])

因为数据很大,所以这里作者也是采取了sketch下采样的方式进行聚类分析。后续再映射回所有细胞,能够有效节约分析时间及空间。

代码语言:javascript
复制
# Seurat V5 Processing (We will sketch and then project) at ~ 15% of the full data
ColonCancer_Flex <- NormalizeData(ColonCancer_Flex)
ColonCancer_Flex <- FindVariableFeatures(ColonCancer_Flex)
ColonCancer_Flex <- SketchData(object = ColonCancer_Flex,ncells = 37000,
                               method = "LeverageScore",sketched.assay = "sketch")
代码语言:javascript
复制
DefaultAssay(ColonCancer_Flex) <- "sketch"
ColonCancer_Flex <- FindVariableFeatures(ColonCancer_Flex)
ColonCancer_Flex <- ScaleData(ColonCancer_Flex)
ColonCancer_Flex <- RunPCA(ColonCancer_Flex)
代码语言:javascript
复制
ElbowPlot(ColonCancer_Flex,ndims=40)
代码语言:javascript
复制
ColonCancer_Flex <- FindNeighbors(ColonCancer_Flex, dims = 1:25)
ColonCancer_Flex <- FindClusters(ColonCancer_Flex, resolution = 0.6)


# Project to Full dataset
ColonCancer_Flex <- Seurat:::ProjectData(
  object = ColonCancer_Flex,
  assay = "RNA",
  full.reduction = "pca.full",
  sketched.assay = "sketch",
  sketched.reduction = "pca",
  umap.model = "umap",
  dims = 1:50,
  refdata = list(seurat_cluster.projected = "seurat_clusters")
)
代码语言:javascript
复制
DimPlot(ColonCancer_Flex,reduction = 'umap',group.by = 'seurat_cluster.projected')+
  DimPlot(ColonCancer_Flex,reduction = 'umap',group.by = 'Patient')

细胞注释:

代码语言:javascript
复制
Idents(ColonCancer_Flex) <- 'seurat_cluster.projected'
library(plyr)
代码语言:javascript
复制
ColonCancer_Flex@meta.data$level1 <- mapvalues(ColonCancer_Flex@meta.data$seurat_cluster.projected,
                                          from=c("0","1","2","3","4","5",
                                                 "6","7","8","9","10",
                                                 "11","12","13","14","15",
                                                 "16","17","18","19",
                                                 "20","21","22","23","24"),
                                          to=c("Tumor","Tumor","Myeloid","Smooth Muscle","Intestinal Epithelial","Fibroblast",
                                                 "Smooth Muscle","Fibroblast","Endothelial","T cells","T cells",
                                                 "B cells","B cells","Fibroblast","Intestinal Epithelial","Neuronal",
                                                 "Myeloid","Fibroblast","Myeloid","Neuronal",
                                                 "Endothelial","Myeloid","Neuronal","Smooth Muscle","Intestinal Epithelial"))
Idents(ColonCancer_Flex) <- ColonCancer_Flex@meta.data$level1
代码语言:javascript
复制
DimPlot(ColonCancer_Flex,reduction = 'umap',label = T,repel = T)

这里对于数据只进行了大类的注释,原文还进行了更佳详细的注释,我们不再演示,但是对于反卷积,细胞注释越详细越好。原文中作者是将各个celltype提取出来分别进行了重新聚类,降维使用的还是原先大群的降维,然后进行的细胞注释。

2、RCTD反卷积

反卷积与visium中的演示是一样的,visium HD只是精度更高了而已,其他的没有特殊流程。prepare reference:

代码语言:javascript
复制
#对于单细胞转录组数据,选取与空转对应的两例
ColonCancer <- ColonCancer_Flex[,ColonCancer_Flex$Patient %in% c("P5CRC","P5NAT")]
Idents(ColonCancer) <- 'level1'
#counts
counts  = GetAssayData(ColonCancer,assay = 'RNA',layer = 'counts')
#celltype
cluster <- as.factor(ColonCancer$level1)
#nUMI
nUMI <- ColonCancer$nCount_RNA
代码语言:javascript
复制
# create the RCTD reference object
reference <- Reference(counts, cluster, nUMI)

preparing for spatail data:我们的空转数据是整合的,这里分开单独进行反卷积。直接使用原始数据,由于visiumHD bins超级多,会非常耗费时间精力,可以采取seurat官网的策略,将数据sketch后进行反卷积,然后再映射回去。这里测试一个样本的反卷积,看看运行能力!

代码语言:javascript
复制
#counts
ST_counts <- GetAssayData(CRC_merge,assay = 'Spatial',layer = 'counts.1')
#coords
coords <- GetTissueCoordinates(CRC_merge,image = 'CRC')[,1:2]
#nUMI
metadata = subset(CRC_merge@meta.data, group=='CRC')#提取每个sample的metadata
ST_nUMI <- metadata$nCount_Spatial#获取UMI
names(ST_nUMI) <- rownames(metadata)

# create the RCTD query object
query <- SpatialRNA(coords = coords, counts = ST_counts, nUMI = ST_nUMI)

运行RCTD:对于visium HD数据,doublet_mode选择”doublet”。

代码语言:javascript
复制
RCTD <- create.RCTD(query, reference, max_cores = 10)
RCTD <- run.RCTD(RCTD, doublet_mode = "doublet")

提取反卷积的CRC seurat obj,将反卷积结果添加到seurat:

代码语言:javascript
复制
load("~/data_analysis/10X_space/CRC_visiumHD/CRC_merge.RData")
CRC_obj <- subset(CRC_merge,group=='CRC')

3、结果可视化

添加结果到seurat

代码语言:javascript
复制
load("~/data_analysis/10X_space/CRC_visiumHD/RCTD_HD.RData")
# add results back to Seurat object
CRC_obj <- AddMetaData(CRC_obj, metadata = RCTD@results$results_df)
代码语言:javascript
复制
# add results back to Seurat object
CRC_obj <- AddMetaData(CRC_obj, metadata = RCTD@results$weights)
代码语言:javascript
复制
SpatialFeaturePlot(CRC_obj,
                   features =  colnames(RCTD@results$weights),
                   ncol = 3,
                   images = 'CRC')&
  theme_bw()&
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank())

对于visium HD数据,看看first_type反卷积注释结果,对比一下cluster聚类结果,不同的celltype其实区分还是很明显,基本与clsuter是一致的(空转联合单细胞分析(八):Visium HD多样本整合分析(基于seurat))。

代码语言:javascript
复制
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628",
   "#F781BF","#8DD3C7")
SpatialDimPlot(CRC_obj, 
               group.by = 'first_type', 
               pt.size.factor = 8,
               images = 'CRC') +
  scale_fill_manual(values = color_pal)
代码语言:javascript
复制
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628",
   "#F781BF","#999999","#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3",
   "#FDB462", "#B3DE69","#FCCDE5","#CCEBC5","#D95F02","#003366","#1B9E77",
   "#7570B3","#E7298A","#66A61E","#E6AB02","#FFC0CB")
SpatialDimPlot(CRC_obj, 
               group.by = 'seurat_cluster.projected', 
               pt.size.factor = 8,
               images = 'CRC') +
  scale_fill_manual(values = color_pal)+
  guides(fill=guide_legend(ncol=2))

END!

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2025-12-18,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 KS科研分享与服务 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 1、人结直肠癌参考数据集构建
  • 2、RCTD反卷积
  • 3、结果可视化
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档