今天出了半天门诊,什么病都有,还是挺累的。🙃
还有个病人状态不行转去了ICU
。🫠
人生无常,大肠包小肠啊。😧
今天我们讲讲如何将查询数据集映射到参考数据集上。🤩
共嵌入
(Co-embedding
)用于比较相似的数据集,以识别相似性和差异性,并在细胞间传输注释。🤓
rm(list = ls())
library(tidyverse)
library(monocle3)
library(Matrix)
加载参考数据集。😘
matrix_ref <- readMM(("./cao.mouse_embryo.sample.mtx.gz"))
cell_ann_ref <- read.csv("./cao.mouse_embryo.sample.coldata.txt.gz", text=T, sep='\t')
gene_ann_ref <- read.csv("./cao.mouse_embryo.sample.rowdata.txt.gz", text=T, sep='\t')
cds_ref <- new_cell_data_set(matrix_ref,
cell_metadata = cell_ann_ref,
gene_metadata = gene_ann_ref)
加载查询数据集。🙊
matrix_qry <- readMM("./srivatsan.mouse_embryo_scispace.sample.mtx.gz")
cell_ann_qry <- read.csv("./srivatsan.mouse_embryo_scispace.sample.coldata.txt.gz", text=T, sep='\t')
gene_ann_qry <- read.csv("./srivatsan.mouse_embryo_scispace.sample.rowdata.txt.gz", text=T, sep='\t')
cds_qry <- new_cell_data_set(matrix_qry,
cell_metadata = cell_ann_qry,
gene_metadata = gene_ann_qry)
比对的话必须是具有相同的基因的,所以不是交集基因的部分需要删除掉。😀
参考数据集基因。🧬
genes_ref <- row.names(cds_ref)
查询数据集基因。🧬
genes_qry <- row.names(cds_qry)
交集基因。🧬
genes_shared <- intersect(genes_ref, genes_qry)
删除非交集基因。🧬
cds_ref <- cds_ref[genes_shared,]
cds_qry <- cds_qry[genes_shared,]
接着我们需要对上面的data
用一下共同的UMI cutoff
来过滤细胞。🧫
经过下面的计算,我们可以找到cutoff
为1000
。😬
numi_ref <- min(colData(cds_ref)[['Total_mRNAs']])
numi_qry <- min(colData(cds_qry)[['n.umi']])
cds_ref <- estimate_size_factors(cds_ref)
cds_qry <- estimate_size_factors(cds_qry)
这里我们把build_nn_index
设为T
,来进行降维建模。😏
cds_ref <- preprocess_cds(cds_ref, num_dim=100)
cds_ref <- reduce_dimension(cds_ref, build_nn_index=T)
保存PCA
和UMAP transform models
,一会会用到。🥳
save_transform_models(cds_ref, 'cds_ref_test_models')
把参考transform models
加入到查询数据集的cds
里。😘
cds_qry <- load_transform_models(cds_qry, 'cds_ref_test_models')
接着我们应用一下。😂
cds_qry <- preprocess_transform(cds_qry)
cds_qry <- reduce_dimension_transform(cds_qry)
先标记一下不同的cds
。😘
colData(cds_ref)[['data_set']] <- 'reference'
colData(cds_qry)[['data_set']] <- 'query'
合并两个cds
。🙊
cds_combined <- combine_cds(list(cds_ref, cds_qry),
keep_all_genes=T,
cell_names_unique=T,
keep_reduced_dims=T)
plot_cells(cds_combined, color_cells_by='data_set')
cds_qry_lab_xfr <- transfer_cell_labels(cds_qry,
reduction_method='UMAP',
ref_coldata=colData(cds_ref),
ref_column_name='Main_cell_type',
query_column_name='cell_type_xfr',
transform_models_dir='cds_ref_test_models')
cds_qry_lab_fix <- fix_missing_cell_labels(cds_qry_lab_xfr,
reduction_method='UMAP',
from_column_name='cell_type_xfr',
to_column_name='cell_type_fix')
最后祝大家早日不卷!~
点个在看吧各位~ ✐.ɴɪᴄᴇ ᴅᴀʏ 〰