为什么需要细胞通讯分析?细胞通讯在发育生物学、致癌过程和器官功能障碍中扮演关键角色。随着单细胞RNA测序技术的发展,研究人员现可探索多细胞生物体内错综复杂的通讯网络。配体-受体相互作用作为细胞通讯的基本模式,在疾病与退行性过程中至关重要。近年来,众多实验室开发了细胞通讯分析工具,如CellChat、nichenet、CellCall和ICELLNET等,为探索发育过程、肿瘤免疫微环境和潜在治疗靶点提供了新视角。我们之前分享过CellChat做细胞通讯的详细教程【单细胞转录组 | 细胞互作分析】,本文将以NicheNet为例,演示scRNA-seq细胞通讯分析。
首先,加载必要的R包并设置工作目录。本文重点演示步骤,数据我以人类外周血单细胞数据集为例:
# 设置工作目录
setwd("/mnt/data/home/user/nicheNet")
#安装
#devtools::install_github("saeyslab/nichenetr")
# 加载所需包
library(nichenetr)
library(Seurat)
library(SeuratObject)
library(tidyverse)
library(ggplot2)
# 加载PBMC数据集
seuratObj <- readRDS("/mnt/data/home/user/pbmc3k_processed.rds")
# 查看细胞类型信息
seuratObj@meta.data %>% head()
seuratObj@meta.data$seurat_annotations %>% table()
# 可视化PBMC细胞类型分布
DimPlot(seuratObj, reduction = "umap", group.by = 'seurat_annotations', label = TRUE)
NicheNet需要三个关键数据集:配体-受体网络、配体-靶基因矩阵和加权网络。
# 加载NicheNet预计算的数据库
lr_network <- readRDS("./lr_network_human_21122021.rds")
ligand_target_matrix <- readRDS("./ligand_target_matrix_nsga2r_final.rds")
weighted_networks <- readRDS("./weighted_networks_nsga2r_final.rds")
# 检查数据库信息
cat("配体-受体对数量:", nrow(lr_network), "\n")
cat("配体数量:", length(unique(lr_network$from)), "\n")
cat("受体数量:", length(unique(lr_network$to)), "\n")
cat("配体-靶基因矩阵维度:", dim(ligand_target_matrix), "\n")
在NicheNet分析中,我们需要定义哪些细胞是信号发送者(sender),哪些是接收者(receiver)。这里我们选择单核细胞作为发送者,记忆型CD4+ T细胞作为接收者。
# 定义发送细胞和接收细胞
sender_cells <- WhichCells(seuratObj, expression = seurat_annotations == "CD14+ Mono")
receiver_cells <- WhichCells(seuratObj, expression = seurat_annotations == "Memory CD4 T")
# 确认细胞数量
cat("发送细胞(CD14+ Mono)数量:", length(sender_cells), "\n")
cat("接收细胞(Memory CD4 T)数量:", length(receiver_cells), "\n")
我们需要确定发送细胞中表达的潜在配体分子,这些分子可能参与细胞间通讯。
# 获取发送细胞表达的基因
sender_expression <- GetAssayData(seuratObj, slot = "data")[, sender_cells]
sender_expression <- rowMeans(sender_expression)
# 筛选表达的配体(设置合理的表达阈值)
expressed_ligands <- names(sender_expression)[sender_expression > 0.1]
expressed_ligands <- intersect(expressed_ligands, unique(lr_network$from))
cat("单核细胞表达的配体数量:", length(expressed_ligands), "\n")
cat("前5个表达配体:", head(expressed_ligands, 5), "\n")
# 可视化表达最高的10个配体
top_ligands <- names(sort(sender_expression[expressed_ligands], decreasing = TRUE)[1:10])
VlnPlot(seuratObj, features = top_ligands,
group.by = "seurat_annotations", ncol = 5, pt.size = 0) +
ggtitle("Top 10 ligands expressed in monocytes")
为了分析配体对靶细胞的影响,我们需要确定接收细胞中的特征基因集。这里我们比较记忆CD4+ T细胞和初始CD4+ T细胞的差异基因。
# 创建一个新的分组变量
seuratObj$cd4_group <- "Other"
seuratObj$cd4_group[seuratObj$seurat_annotations == "Memory CD4 T"] <- "Memory CD4 T"
seuratObj$cd4_group[seuratObj$seurat_annotations == "Naive CD4 T"] <- "Naive CD4 T"
# 计算差异表达基因
Idents(seuratObj) <- "cd4_group"
de_results <- FindMarkers(seuratObj,
ident.1 = "Memory CD4 T",
ident.2 = "Naive CD4 T",
min.pct = 0.1,
logfc.threshold = 0.25)
# 提取差异基因
geneset_oi <- rownames(de_results)[de_results$p_val_adj < 0.05 & de_results$avg_log2FC > 0]
cat("记忆CD4+ T细胞上调的差异基因数量:", length(geneset_oi), "\n")
# 定义背景基因(在任一CD4 T细胞亚群中有表达的基因)
cd4_cells <- WhichCells(seuratObj, expression = seurat_annotations %in% c("Memory CD4 T", "Naive CD4 T"))
cd4_expression <- GetAssayData(seuratObj, slot = "data")[, cd4_cells]
background_genes <- rownames(cd4_expression)[rowSums(cd4_expression > 0) > 0]
background_genes <- setdiff(background_genes, geneset_oi)
这是NicheNet的核心步骤,它基于先验知识预测哪些配体可能对目标基因集有调控作用。
# 预测配体活性
ligand_activities <- predict_ligand_activities(
geneset = geneset_oi,
background_expressed_genes = background_genes,
potential_ligands = expressed_ligands,
ligand_target_matrix = ligand_target_matrix
)
# 提取排名前十的配体
best_ligands <- ligand_activities %>%
arrange(-aupr_corrected) %>%
top_n(10, aupr_corrected) %>%
pull(test_ligand)
cat("预测活性最高的配体:", paste(best_ligands, collapse=", "), "\n")
# 可视化配体活性
p1 <- ligand_activities %>%
arrange(-aupr_corrected) %>%
top_n(20, aupr_corrected) %>%
mutate(test_ligand = factor(test_ligand, levels = rev(test_ligand))) %>%
ggplot(aes(x = test_ligand, y = aupr_corrected)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_classic() +
labs(x = "Ligand", y = "AUPR", title = "Ligand activity prediction")
print(p1)
确定高活性配体可能调控的特定靶基因,这有助于理解配体的作用机制。
# 选择前5个活性最高的配体进行分析
best_predicting_ligands <- best_ligands[1:5]
# 检查配体和基因是否在矩阵中
valid_ligands <- intersect(best_predicting_ligands, colnames(ligand_target_matrix))
valid_genes <- intersect(geneset_oi, rownames(ligand_target_matrix))
# 如果有缺失的配体或基因,打印警告
if(length(valid_ligands) < length(best_predicting_ligands)) {
cat("警告:有", length(best_predicting_ligands) - length(valid_ligands), "个配体不在ligand_target_matrix中\n")
}
if(length(valid_genes) < length(geneset_oi)) {
cat("警告:有", length(geneset_oi) - length(valid_genes), "个基因不在ligand_target_matrix中\n")
}
# 使用有效的基因和配体提取靶基因关系
ligand_target_links <- valid_ligands %>%
lapply(function(ligand) {
targets <- ligand_target_matrix[valid_genes, ligand]
targets <- sort(targets, decreasing = TRUE)
data.frame(
ligand = ligand,
target = names(targets),
weight = targets,
stringsAsFactors = FALSE
) %>% filter(weight > 0.01) # 筛选强相互作用
}) %>% bind_rows()
# 打印前几行检查结果
head(ligand_target_links)
# 选择前10个靶基因用于可视化
top_targets_per_ligand <- ligand_target_links %>%
group_by(ligand) %>%
top_n(10, weight) %>%
ungroup()
# 绘制热图
p2 <- top_targets_per_ligand %>%
ggplot(aes(x = target, y = ligand, fill = weight)) +
geom_tile() +
scale_fill_gradient(low = "whitesmoke", high = "red") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Regulatory potential", x = "Target genes", y = "Ligands")
print(p2)
识别高活性配体在接收细胞中对应的受体,完成配体-受体相互作用网络的构建。
# 获取接收细胞中表达的受体
receiver_expression <- GetAssayData(seuratObj, slot = "data")[, receiver_cells]
receiver_expression <- rowMeans(receiver_expression)
expressed_receptors <- names(receiver_expression)[receiver_expression > 0.1]
# 查找活跃配体对应的受体
lr_network_top <- lr_network %>%
filter(from %in% best_ligands & to %in% expressed_receptors)
# 按配体-受体表达水平排序
ligand_receptor_pairs <- lr_network_top %>%
mutate(
ligand_expression = sender_expression[from],
receptor_expression = receiver_expression[to]
) %>%
filter(!is.na(ligand_expression) & !is.na(receptor_expression)) %>%
arrange(-ligand_expression * receptor_expression)
# 创建热图
p3 <- ligand_receptor_pairs %>%
head(20) %>% # 取前20对
ggplot(aes(x = to, y = from, fill = ligand_expression * receptor_expression)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red", name = "Expression product") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Ligand-receptor expression", x = "Receptors (Memory CD4 T)", y = "Ligands (CD14+ Mono)")
print(p3)
将配体-受体互作关系以网络图形式展示,直观展现细胞间通讯模式。
# 网络图可视化
library(igraph)
# 选择前15对配体-受体对
top_pairs <- ligand_receptor_pairs %>%
top_n(15, ligand_expression * receptor_expression)
# 创建清晰的边和节点数据
edges <- top_pairs[, c("from", "to")]
nodes <- data.frame(
name = unique(c(as.character(edges$from), as.character(edges$to))),
stringsAsFactors = FALSE
)
nodes$type <- ifelse(nodes$name %in% edges$from, "ligand", "receptor")
# 创建图形
g <- graph_from_data_frame(edges, vertices = nodes, directed = TRUE)
# 设置图形布局
layout <- layout_as_bipartite(g,
types = nodes$type == "ligand",
hgap = 2,
vgap = 1)
# 设置节点颜色
V(g)$color <- ifelse(nodes$type == "ligand", "lightblue", "pink")
# 设置标签颜色
V(g)$label.color <- "black"
# 绘制网络图
plot(g,
layout = layout,
vertex.size = 10,
vertex.label = V(g)$name,
vertex.label.cex = 0.8,
edge.arrow.size = 0.5,
main = "Monocyte-T cell ligand-receptor interaction network")
legend("bottomright",
legend = c("Ligand", "Receptor"),
pch = 21,
pt.bg = c("lightblue", "pink"),
pt.cex = 2,
cex = 0.8,
bty = "n")
以上为了简化数据处理过程,设置了单一的细胞类型为发送细胞和接收细胞。应用于实践的时候你可以扩展分析,将多个细胞类型定义为发送者,例如同时考虑CD14+单核细胞和FCGR3A+单核细胞:
# 定义多个发送细胞类型
sender_cells_CD14 <- WhichCells(seuratObj, expression = seurat_annotations == "CD14+ Mono")
sender_cells_FCGR3A <- WhichCells(seuratObj, expression = seurat_annotations == "FCGR3A+ Mono")
sender_cells_B <- WhichCells(seuratObj, expression = seurat_annotations == "B")
# 获取各发送细胞类型的表达特征
sender_expression_CD14 <- rowMeans(GetAssayData(seuratObj, slot = "data")[, sender_cells_CD14])
sender_expression_FCGR3A <- rowMeans(GetAssayData(seuratObj, slot = "data")[, sender_cells_FCGR3A])
sender_expression_B <- rowMeans(GetAssayData(seuratObj, slot = "data")[, sender_cells_B])
# 筛选各细胞类型表达的配体
expressed_ligands_CD14 <- names(sender_expression_CD14)[sender_expression_CD14 > 0.1]
expressed_ligands_FCGR3A <- names(sender_expression_FCGR3A)[sender_expression_FCGR3A > 0.1]
expressed_ligands_B <- names(sender_expression_B)[sender_expression_B > 0.1]
# 与已知配体数据库求交集
expressed_ligands_CD14 <- intersect(expressed_ligands_CD14, unique(lr_network$from))
expressed_ligands_FCGR3A <- intersect(expressed_ligands_FCGR3A, unique(lr_network$from))
expressed_ligands_B <- intersect(expressed_ligands_B, unique(lr_network$from))
# 比较不同发送细胞类型的配体表达
venn_data <- list(
`CD14+ Mono` = expressed_ligands_CD14,
`FCGR3A+ Mono` = expressed_ligands_FCGR3A,
`B cell` = expressed_ligands_B
)
# 如果安装了VennDiagram包,可以可视化比较
# library(VennDiagram)
# venn.plot <- venn.diagram(venn_data, filename = NULL)
# grid.draw(venn.plot)
NicheNet是分析细胞间通讯的强大工具,特别适合整合单细胞RNA测序数据。通过本教程的分析流程,我们确定了PBMC中单核细胞可能向记忆CD4+ T细胞发送的关键信号分子,以及这些信号可能影响的下游靶基因。这种分析为理解免疫细胞间的协作机制提供了重要线索,有助于研究免疫应答、炎症过程和相关疾病的分子机制。