CellTrek是一个空间分析工具包,可以做单细胞和空间的整合分析、空间细胞共定位和空间基因表达分析。
它的共定位分析可以找到空间上距离相互靠近的细胞,并以网络图的形式呈现。
官方教程中,共定位分析需要依赖整合单细胞和空间数据之后的celltrek对象,本文则只使用空间数据进行共定位分析。同时官方提供了一个shiny程序用于展示共定位网络结构,交互性很好,但是无法导出图片,本文则使用igraph从头绘制共定位网络图。
仍以10X的xenium breast官方数据为例,下载后解压。
# read xenium gene expression data
xenium_expr <- Read10X_h5("Xenium_V1_FFPE_Human_Breast_IDC/cell_feature_matrix.h5")
xenium <- CreateSeuratObject(
xenium_expr$`Gene Expression`,
min.cells = 10,
min.features = 10
) %>%
RenameCells(add.cell.id = "cell")
# read cell coords data
# 3 column should exists to run CellTrek::scoloc analysis
# coord_x, coord_y, id_new
coords_xenium <- arrow::read_parquet("Xenium_V1_FFPE_Human_Breast_IDC/cells.parquet")
meta_coords <-
coords_xenium %>%
dplyr::select(
cell_id,
coord_x = x_centroid,
coord_y = y_centroid
) %>%
mutate(cell_id = str_c("cell", cell_id, sep = "_")) %>%
mutate(id_new = cell_id) %>%
column_to_rownames("cell_id")
xenium <- xenium %>% AddMetaData(meta_coords)
构造模拟的组别和细胞注释,这里只是示意,正常情况下,group是分析组别,annotation是注释好的细胞类型。
set.seed(1234)
xenium$group <- sample(LETTERS[1:3], size = ncol(xenium), replace = TRUE)
xenium$annotation <- sample(str_c("Cell", LETTERS[1:10], sep = "_"), size = ncol(xenium), replace = TRUE)
展示一下此时的数据,为了提高绘图速度,只是用10%的细胞绘制空间原位图:
# spatial plot
xenium[[]] %>%
dplyr::slice_sample(prop = 0.1) %>%
ggplot(aes(x = coord_x, y = coord_y)) +
geom_tile(aes(fill = group), height = 20, width = 20) +
coord_equal() +
ggsci::scale_fill_aaas() +
theme_void()
xenium[[]] %>%
dplyr::slice_sample(prop = 0.1) %>%
ggplot(aes(x = coord_x, y = coord_y)) +
geom_tile(aes(fill = annotation), height = 20, width = 20) +
coord_equal() +
ggsci::scale_fill_igv() +
theme_void()
# run CellTrek analysis sequentially for the three groups
all_data_grp_ls <- xenium %>% SplitObject(split.by = "group")
# run celltrek
all_celltrek_ls <-
all_data_grp_ls %>%
map(function(sc){ # browser()
graph_KL <-
CellTrek::scoloc(
sc,
col_cell ='annotation',
use_method ='KL',
h = 10,
eps = 1e-50
)
cells <- sc$annotation %>% unique()
names(cells) <- cells %>% make.names()
## We extract the minimum spanning tree (MST) result from the graph
graph_KL_mst_cons <- graph_KL$mst_cons
rownames(graph_KL_mst_cons) <-
colnames(graph_KL_mst_cons) <-
cells[colnames(graph_KL_mst_cons)]
## We then extract the metadata (including cell types and their frequencies)
cell_class <- sc[[]] %>%
group_by(annotation) %>%
summarise(n = n()) %>%
as.data.frame() %>%
dplyr::rename(id = annotation)
list(
mst = graph_KL_mst_cons,
cell_class = cell_class
)
})
# celltrek network can be visualized by scoloc_vis
# group A:
CellTrek::scoloc_vis(all_celltrek_lsAmst, meta_data = all_data_grp_ls
# plot celltrek network by igraph
cell_colors <- ggsci::pal_igv()(10)
p_all_celltrek_ls <-
all_celltrek_ls %>%
map(function(ct){
graph <-
ct$mst %>%
as.data.frame() %>%
rownames_to_column('from') %>%
pivot_longer(-from, names_to = "to", values_to = "weight") %>%
dplyr::filter(weight > 0.2) %>%
igraph::graph_from_data_frame() %>%
tidygraph::as_tbl_graph()
set.seed(12345)
p <-
graph %>%
tidygraph::activate(nodes) %>%
left_join(ct$cell_class, by = c(name = 'id')) %>%
ggraph::ggraph(layout = 'fr') + #, weights = weight) +
ggraph::geom_edge_link(aes(width = weight), color = 'gray', show.legend = FALSE) +
ggraph::geom_node_point(aes(size = n, color = name), show.legend = FALSE) +
ggraph::geom_node_text(aes(label = name), color = 'gray10', nudge_y= -0.2) +
ggraph::scale_edge_width(range = c(1, 4)) +
scale_size(range = c(4, 12)) +
# ggsci::scale_color_igv() +
scale_color_manual(values = cell_colors) +
coord_fixed(clip = 'off') +
theme_void()
p
})
celltrek网络图入下: