
对于visium HD数据,没有执行细胞分割,bins其实也不能代表单个细胞,只是8um和16um的精度已经足够接近,也可以进行反卷积进行细胞注释。这里演示RCTD,和visium中的演示类似。详细信息参考https://mp.weixin.qq.com/s/y_05z-p0G5mUCp1lJA3Hew。同时,我们参考原文,构建人结直肠癌参考数据seurat object。
setwd("~/data_analysis/10X_space/CRC_visiumHD/")
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
#devtools::install_github("dmcable/spacexr", build_vignettes = FALSE)
library(spacexr)
library(Matrix)
library(stringr)同样的,需要构建一个参考数据集,数据和代码论文都提供了(https://www.nature.com/articles/s41588-025-02193-3#Sec8),因为作者提供了数据和可运行代码https://github.com/10XGenomics/HumanColonCancer_VisiumHD/blob/main/Methods/FlexSingleCell.R,所以得到的结果就是原文使用的结果。数据下载:https://www.10xgenomics.com/platforms/visium/product-family/dataset-human-crc。这个数据细胞来源于结直肠癌(CRC)组织样本中50µm厚的FFPE切片,共计5名患者,并包含其中3名患者的配对癌旁正常组织。用于本次分析的FFPE切片与Visium HD实验所用的组织切片为连续切片,以确保构建的单细胞参考图谱能够高度代表用于空间转录组分析的组织区域。这里按照原文的代码构建一份数据,也可以作为单细胞参考数据集或者学习数据集。10X官网提供的数据是cellranger aggr输出的矩阵结果。
wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_count_filtered_feature_bc_matrix.h5
wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_aggregation.csv
wget https://cf.10xgenomics.com/samples/cell-exp/8.0.0/HumanColonCancer_Flex_Multiplex/HumanColonCancer_Flex_Multiplex_count_analysis.tar.gzColonFlex.data <- Read10X_h5('./CRC_GEX/HumanColonCancer_Flex_Multiplex_count_filtered_feature_bc_matrix.h5') 现在读入的矩阵ColonFlex.data是所有样本的混合,需要对cellid区分sample及分组,生成metadata!用到的文件是cellranger aggr生成的HumanColonCancer_Flex_Multiplex_aggregation.csv。
# Read aggregation_csv.csv file to be used as MetaData ( Patient, etc)
MetaData<-read.csv('./CRC_GEX/HumanColonCancer_Flex_Multiplex_aggregation.csv')#读取aggregation的样本信息表
MetaData$Patient<-sapply(strsplit(MetaData$sample_id,"_"),function(X){return(X[6])})
MetaData$BC<-sapply(strsplit(MetaData$sample_id,"_"),function(X){return(X[7])})
MetaData$Condition<-gsub("P[0-9]","",MetaData$Patient)
Index<-as.numeric(sapply(strsplit(colnames(ColonFlex.data),"-"),function(X){return(X[2])}))
MetaData<-MetaData[Index,3:4]
MetaData$Barcode<-colnames(ColonFlex.data)
rownames(MetaData)<-MetaData$Barcode创建seurat obj,同时添加metadata。计算线粒体基因比例,用于质控。这里作者没有再跑UMAP,使用的是cellranger aggr分析的UMAP结果,文件在HumanColonCancer_Flex_Multiplex_count_analysis.tar.gz。
# Cell Ranger Results
UMAP<-read.csv('./CRC_GEX/analysis/umap/gene_expression_2_components/projection.csv')
MetaData<-cbind(MetaData,UMAP[match(MetaData$Barcode,UMAP$Barcode),2:3])# Create Seurat Object
ColonCancer_Flex<-CreateSeuratObject(ColonFlex.data,meta.data = MetaData)# Add % MT
ColonCancer_Flex[["MT.percent"]] <- PercentageFeatureSet(ColonCancer_Flex, pattern = "^MT-")# Add UMAP projection
ColonCancer_Flex[["umap"]] <- CreateDimReducObject(embeddings = as.matrix(MetaData[,c("UMAP.1","UMAP.2")]), key = "UMAP_", assay = DefaultAssay(ColonCancer_Flex))数据质控:
# UMI and Gene Threshold (by removing 5%)
UMI_TH<-quantile(ColonCancer_Flex$nCount_RNA,c(0.025,0.975))
Gene_TH<-quantile(ColonCancer_Flex$nFeature_RNA,c(0.025,0.975))
UMI_TH
Gene_TH## 2.5% 97.5%
## 311.0 16172.8## 2.5% 97.5%
## 242.0 5275.8# Add variable with QC filter status
ColonCancer_Flex$QCFilter<-ifelse(ColonCancer_Flex$MT.percent < 25 &
ColonCancer_Flex$nCount_RNA > UMI_TH[1] & ColonCancer_Flex$nCount_RNA < UMI_TH[2] &
ColonCancer_Flex$nFeature_RNA > Gene_TH[1] & ColonCancer_Flex$nFeature_RNA < Gene_TH[2],"Keep","Remove")# Remove bcs that failed QC
ColonCancer_Flex<-subset(ColonCancer_Flex,cells=colnames(ColonCancer_Flex)[ColonCancer_Flex$QCFilter=="Keep"])因为数据很大,所以这里作者也是采取了sketch下采样的方式进行聚类分析。后续再映射回所有细胞,能够有效节约分析时间及空间。
# Seurat V5 Processing (We will sketch and then project) at ~ 15% of the full data
ColonCancer_Flex <- NormalizeData(ColonCancer_Flex)
ColonCancer_Flex <- FindVariableFeatures(ColonCancer_Flex)
ColonCancer_Flex <- SketchData(object = ColonCancer_Flex,ncells = 37000,
method = "LeverageScore",sketched.assay = "sketch")DefaultAssay(ColonCancer_Flex) <- "sketch"
ColonCancer_Flex <- FindVariableFeatures(ColonCancer_Flex)
ColonCancer_Flex <- ScaleData(ColonCancer_Flex)
ColonCancer_Flex <- RunPCA(ColonCancer_Flex)ElbowPlot(ColonCancer_Flex,ndims=40)
ColonCancer_Flex <- FindNeighbors(ColonCancer_Flex, dims = 1:25)
ColonCancer_Flex <- FindClusters(ColonCancer_Flex, resolution = 0.6)
# Project to Full dataset
ColonCancer_Flex <- Seurat:::ProjectData(
object = ColonCancer_Flex,
assay = "RNA",
full.reduction = "pca.full",
sketched.assay = "sketch",
sketched.reduction = "pca",
umap.model = "umap",
dims = 1:50,
refdata = list(seurat_cluster.projected = "seurat_clusters")
)DimPlot(ColonCancer_Flex,reduction = 'umap',group.by = 'seurat_cluster.projected')+
DimPlot(ColonCancer_Flex,reduction = 'umap',group.by = 'Patient')
细胞注释:
Idents(ColonCancer_Flex) <- 'seurat_cluster.projected'
library(plyr)ColonCancer_Flex@meta.data$level1 <- mapvalues(ColonCancer_Flex@meta.data$seurat_cluster.projected,
from=c("0","1","2","3","4","5",
"6","7","8","9","10",
"11","12","13","14","15",
"16","17","18","19",
"20","21","22","23","24"),
to=c("Tumor","Tumor","Myeloid","Smooth Muscle","Intestinal Epithelial","Fibroblast",
"Smooth Muscle","Fibroblast","Endothelial","T cells","T cells",
"B cells","B cells","Fibroblast","Intestinal Epithelial","Neuronal",
"Myeloid","Fibroblast","Myeloid","Neuronal",
"Endothelial","Myeloid","Neuronal","Smooth Muscle","Intestinal Epithelial"))
Idents(ColonCancer_Flex) <- ColonCancer_Flex@meta.data$level1DimPlot(ColonCancer_Flex,reduction = 'umap',label = T,repel = T)
这里对于数据只进行了大类的注释,原文还进行了更佳详细的注释,我们不再演示,但是对于反卷积,细胞注释越详细越好。原文中作者是将各个celltype提取出来分别进行了重新聚类,降维使用的还是原先大群的降维,然后进行的细胞注释。
反卷积与visium中的演示是一样的,visium HD只是精度更高了而已,其他的没有特殊流程。prepare reference:
#对于单细胞转录组数据,选取与空转对应的两例
ColonCancer <- ColonCancer_Flex[,ColonCancer_Flex$Patient %in% c("P5CRC","P5NAT")]
Idents(ColonCancer) <- 'level1'
#counts
counts = GetAssayData(ColonCancer,assay = 'RNA',layer = 'counts')
#celltype
cluster <- as.factor(ColonCancer$level1)
#nUMI
nUMI <- ColonCancer$nCount_RNA# create the RCTD reference object
reference <- Reference(counts, cluster, nUMI)preparing for spatail data:我们的空转数据是整合的,这里分开单独进行反卷积。直接使用原始数据,由于visiumHD bins超级多,会非常耗费时间精力,可以采取seurat官网的策略,将数据sketch后进行反卷积,然后再映射回去。这里测试一个样本的反卷积,看看运行能力!
#counts
ST_counts <- GetAssayData(CRC_merge,assay = 'Spatial',layer = 'counts.1')
#coords
coords <- GetTissueCoordinates(CRC_merge,image = 'CRC')[,1:2]
#nUMI
metadata = subset(CRC_merge@meta.data, group=='CRC')#提取每个sample的metadata
ST_nUMI <- metadata$nCount_Spatial#获取UMI
names(ST_nUMI) <- rownames(metadata)
# create the RCTD query object
query <- SpatialRNA(coords = coords, counts = ST_counts, nUMI = ST_nUMI)运行RCTD:对于visium HD数据,doublet_mode选择”doublet”。
RCTD <- create.RCTD(query, reference, max_cores = 10)
RCTD <- run.RCTD(RCTD, doublet_mode = "doublet")提取反卷积的CRC seurat obj,将反卷积结果添加到seurat:
load("~/data_analysis/10X_space/CRC_visiumHD/CRC_merge.RData")
CRC_obj <- subset(CRC_merge,group=='CRC')添加结果到seurat
load("~/data_analysis/10X_space/CRC_visiumHD/RCTD_HD.RData")
# add results back to Seurat object
CRC_obj <- AddMetaData(CRC_obj, metadata = RCTD@results$results_df)# add results back to Seurat object
CRC_obj <- AddMetaData(CRC_obj, metadata = RCTD@results$weights)SpatialFeaturePlot(CRC_obj,
features = colnames(RCTD@results$weights),
ncol = 3,
images = 'CRC')&
theme_bw()&
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
对于visium HD数据,看看first_type反卷积注释结果,对比一下cluster聚类结果,不同的celltype其实区分还是很明显,基本与clsuter是一致的(空转联合单细胞分析(八):Visium HD多样本整合分析(基于seurat))。
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628",
"#F781BF","#8DD3C7")
SpatialDimPlot(CRC_obj,
group.by = 'first_type',
pt.size.factor = 8,
images = 'CRC') +
scale_fill_manual(values = color_pal)
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628",
"#F781BF","#999999","#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3",
"#FDB462", "#B3DE69","#FCCDE5","#CCEBC5","#D95F02","#003366","#1B9E77",
"#7570B3","#E7298A","#66A61E","#E6AB02","#FFC0CB")
SpatialDimPlot(CRC_obj,
group.by = 'seurat_cluster.projected',
pt.size.factor = 8,
images = 'CRC') +
scale_fill_manual(values = color_pal)+
guides(fill=guide_legend(ncol=2))
END!