我注意到这个R包也提供了一个整合单细胞和空转数据的算法NNLS
(https://ludvigla.github.io/semla/articles/cell_type_mapping_with_NNLS.html)。因此,本文对这个算法进行测试。
github在https://github.com/ludvigla/semla
官方教程在:https://ludvigla.github.io/semla/articles/cell_type_mapping_with_NNLS.html
安装:
remotes::install_github("ludvigla/semla")
library(readr)
library(semla)
library(tibble)
library(dplyr)
library(Seurat)
library(purrr)
library(patchwork)
library(ggplot2)
#install.packages('RcppML')
library(RcppML)
# 空转
brain_st_cortex <- read_rds("./Rawdata/brain_st_cortex.rds")
# 单细胞
brain_sc <- read_rds("./Rawdata/brain_sc.rds")
## Rename the cells/spots with syntactically valid names
brain_st_cortex <- RenameCells(brain_st_cortex, new.names=make.names(Cells(brain_st_cortex)))
brain_sc <- RenameCells(brain_sc, new.names=make.names(Cells(brain_sc)))
可视化空转数据:
## Visualize the ST data
SpatialDimPlot(brain_st_cortex)
image-20231105144510038
可视化单细胞数据:
## Visualize the scRNA-seq data
DimPlot(brain_sc, label = T, label.size = 4.5, group.by = "cell_type")
image-20231105144526583
这里,我们将对10x Visium数据和单细胞数据应用相同的对数归一化处理流程。这里将高变基因的数量设置得非常高,因为稍后我们将对单细胞数据中的高变基因与10x Visium数据中的高变基因取交集。
由于NNLS算法运行非常快,因此不需要像常规处理那样只取2000个高变基因。相反,我们可以使用在单细胞和10x Visium数据中存在交集的所有基因。
# Normalize data and find variable features for Visium data
brain_st_cortex <- brain_st_cortex |>
NormalizeData() |>
FindVariableFeatures(nfeatures = 10000)
# Normalize data and run vanilla analysis to create UMAP embedding
brain_sc <- brain_sc |>
NormalizeData() |>
FindVariableFeatures() |>
ScaleData() |>
RunPCA() |>
RunUMAP(reduction = "pca", dims = 1:30)
# Rerun FindVariableFeatures to increase the number before cell type deconvolution
brain_sc <- brain_sc |>
FindVariableFeatures(nfeatures = 10000)
使用RunNNLS()
函数,输入单细胞数据和空转数据,指定单细胞细胞类型的标签名称,即可运行NNLS反卷积:
DefaultAssay(brain_st_cortex) <- "Spatial"
ti <- Sys.time()
brain_st_cortex <- RunNNLS(object = brain_st_cortex,
singlecell_object = brain_sc,
groups = "cell_type",
minCells_per_celltype = 10)
# ── Predicting cell type proportions ──
#
# ℹ Fetching data from Seurat objects
# → Filtering out features that are only present in one data set
# → Kept 2961 features for deconvolution
# ℹ Preparing data for NNLS
# → Downsampling scRNA-seq data to include a maximum of 50 cells per cell type
# → Kept 15 cell types after filtering
# → Calculating cell type expression profiles
# ℹ Predicting cell type proportions with NNLS for 15 cell types
# ℹ Returning results in a new 'Assay' named 'celltypeprops'
# ℹ Setting default assay to 'celltypeprops'
# ✔ Finished
sprintf("RunNNLS completed in %s seconds", round(Sys.time() - ti, digits = 2))
#"RunNNLS completed in 0.91 seconds"
1秒钟不到就运行结束了。
注意:这个函数默认会过滤掉小于10个细胞的细胞类型,我们可以指定minCells_per_celltype =
参数进行设置。
# Check available cell types
rownames(brain_st_cortex)
# [1] "Astro" "Endo" "L2/3 IT" "L4" "L5 IT" "L5 PT" "L6 CT" "L6 IT" "L6b" "Lamp5" "NP" "Pvalb"
# [13] "Sncg" "Sst" "Vip"
# Plot selected cell types
DefaultAssay(se_brain_spatial) <- "celltypeprops"
selected_celltypes <- c('L6 CT', 'L5 IT', 'L6b', 'L2/3 IT', 'L4', 'Astro')
plots <- lapply(seq_along(selected_celltypes), function(i) {
SpatialFeaturePlot(brain_st_cortex,features = selected_celltypes[1]) +
scale_fill_gradientn(colours = RColorBrewer::brewer.pal(n = 9, name = "Spectral") %>% rev())+
plot_layout(guides = "collect") &
theme(legend.position = "right", legend.margin = margin(b = 50),
legend.text = element_text(angle = 0),
plot.title = element_blank())
}) %>% setNames(nm = selected_celltypes)
wrap_plots(plots, ncol=4)
image-20240419182522039
这里我们对比一下cell2location的结果【整合单细胞和空转数据多种方法之Cell2location】:
image-20231228230033246
就肉眼来看,结果还是挺一致的。
这里使用NNLS和cell2location的结果做一个基于spot的相关性分析:
library(ggpubr)
cell2locat_res = read.csv("./Rawdata/st_cell2location_res.csv", row.names = 1, check.names = F)
head(cell2locat_res)
table(row.names(cell2locat_res) == colnames(brain_st_cortex))
# TRUE
# 1075
input.plot = data.frame(L6CT_cell2locat = cell2locat_res$`q05cell_abundance_w_sf_L6 CT`,
L6CT_NNSL = as.numeric(brain_st_cortex@assays$celltypeprops["L6 CT",]*100))
ggscatter(input.plot, cor.method = "p",
x = "L6CT_cell2locat", y = "L6CT_NNSL",
title = "L6 CT cells",
add = "reg.line", conf.int = TRUE,
add.params = list(fill = "lightgray"))+
stat_cor(method = "p",
label.sep = "\n",
label.y.npc = "top",
label.x = 10,
label.y = 1)
image-20240419203539900
input.plot = data.frame(L23ICT_cell2locat = cell2locat_res$`q05cell_abundance_w_sf_L2/3 IT`,
L23ICT_NNSL = as.numeric(brain_st_cortex@assays$celltypeprops["L2/3 IT",]* 100))
ggscatter(input.plot, cor.method = "p",
x = "L23ICT_cell2locat", y = "L23ICT_NNSL",
title = "L2/3 IT cells",
add = "reg.line", conf.int = TRUE,
add.params = list(fill = "lightgray"))+
stat_cor(method = "p",
label.sep = "\n",
label.y.npc = "top",
label.x = 20,
label.y = 2)
image-20240419203624188
input.plot = data.frame(L4_cell2locat = cell2locat_res$q05cell_abundance_w_sf_L4,
L4_NNSL = as.numeric(brain_st_cortex@assays$celltypeprops["L4",]*100))
ggscatter(input.plot, cor.method = "p",
x = "L4_cell2locat", y = "L4_NNSL",
title = "L4 cells",
add = "reg.line", conf.int = TRUE,
add.params = list(fill = "lightgray"))+
stat_cor(method = "p",
label.sep = "\n",
label.y.npc = "top",
label.x = 16,
label.y = 1)
image-20240419203656828
library(ggpubr)
input.plot = data.frame(Astro_cell2locat = cell2locat_res$q05cell_abundance_w_sf_Astro,
Astro_NNSL = as.numeric(brain_st_cortex@assays$celltypeprops["Astro",]*100))
ggscatter(input.plot, cor.method = "p",
x = "Astro_cell2locat", y = "Astro_NNSL",
title = "Astro cells",
add = "reg.line", conf.int = TRUE,
add.params = list(fill = "lightgray"))+
stat_cor(method = "p",
label.sep = "\n",
label.y.npc = "top",
label.x = 7,
label.y = 1)
image-20240419203731108
尽管两种算法的相关性非常好,但是可以看到NNSL计算得到的每种细胞类型的结果区间(区间比较大)均大于cell2location算法的结果区间。
我们还可以使用semla的内置函数MapMultipleFeatures()
在同一张slide里可视化多种细胞类型的分布情况(cell2location也有类似功能),但是需要先将seurat对象转换为semla需要的对象,这点我在【空转可视化R包semla(一)入门介绍】介绍过:
semla.data <- UpdateSeuratForSemla(brain_st_cortex)
# Plot multiple features
MapMultipleFeatures(semla.data,
pt_size = 2, max_cutoff = 0.99,
override_plot_dims = TRUE,
colors = c("#332288", "#88CCEE", "#44AA99", "#117733", "#DDCC77", "#CC6677"),
features = selected_celltypes) +
plot_layout(guides = "collect")
image-20240419183205407
semla还提供了细胞类型的共定位分析可视化函数。通过计算不同细胞类型之间的成对相关性,我们可以了解哪些细胞类型经常出现在相同的spot上:
cor_matrix <- FetchData(semla.data, selected_celltypes) |>
mutate_all(~ if_else(.x<0.1, 0, .x)) |> # Filter lowest values (-> set as 0)
cor()
diag(cor_matrix) <- NA
max_val <- max(cor_matrix, na.rm = T)
cols <- RColorBrewer::brewer.pal(7, "RdYlBu") |> rev(); cols[4] <- "white"
pheatmap::pheatmap(cor_matrix,
breaks = seq(-max_val, max_val, length.out = 100),
color=colorRampPalette(cols)(100),
cellwidth = 14, cellheight = 14,
treeheight_col = 10, treeheight_row = 10,
main = "Cell type correlation\nwithin spots")
image-20240419183405325
总的来说,NNLS解卷积算法运行速度非常快。R包semla还提供了一些额外的可视化方案,但NNLS解卷积算法的精确度尚待进一步探讨(特别是需要和其他解卷积算法进行benchmark分析)。
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有