最近处理了几个免疫治疗前后的数据集,这种数据集还是比较少,看起来还是黑色素瘤和非小细胞肺癌相关的数据集比较多,还浅看了一下PD-1/PD-L1±CTLA-4的疗效。

还是继续研究上次的TNBC数据集吧,看看复旦分型?
18例患者有肿瘤活检样本,一半是atezo+paclitaxel组,一半是paclitaxel化疗组,2例患者在疾病进展后有肿瘤样本。Responder(R;n=9,包括CR/PR样本)或Non-responder(NR;n = 12,包括SD/PD样本),在联合治疗组获得了4例PR和7例SD,而在化疗组获得了5例PR,3例SD和2例PD, 1例not available。 这么看来,同样的11例患者,化疗组疗效更好啊,也不论PD-L1的表达。不过联合治疗组DCR 100%了

首先去附件下载临床信息文件,信息非常齐全,我想要的就是疗效信息和治疗分组。 当然,白嫖这里的细胞分群 这个表格命名为meta

这个表格命名为meta2,需要的是PD-L1信息,当然TILs也可以看一看

rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
getwd()
setwd('3-cell/')
sce.all=readRDS( "sce.all_celltype.rds")
sce.all
phe = sce.all@meta.data
table(phe$treat)
table(phe$orig.ident)
table(phe$patient)
table(phe$type)
colnames(phe)
##看看tumor里的,排除blood
##只留下给药和进展后的样本
sce5 = sce.all[, sce.all$treat != c( 'Pre' )]
sce6 = sce5[, sce5$type == c( 't' )]
table(sce6$celltype)
phe_sce6 = sce6@meta.data
library("readxl")
meta<- read_excel("../meta.xlsx")
meta2<- read_excel("../meta2.xlsx")
##可以看到总共22位患者,11位是atezo+paclitaxel,11位是paclitaxel
#其中联合治疗组有5位是PD-L1+,atezo应该使用IC≥1%来划定的cutoff,其中两位是SD,3位是PR
meta2 = meta2[-1,]
meta_tumor = meta[meta$`Cell barcode` %in% rownames(phe_sce6),]
table(meta_tumor$Treatment)
table(meta_tumor$Efficacy)
table(meta_tumor$Group)
p = identical(meta_tumor$`Cell barcode`,rownames(phe_sce6));p
if(!p) meta_tumor = meta_tumor[match(rownames(phe_sce6),meta_tumor$`Cell barcode`),]
table(meta_tumor$Cluster)
table(meta_tumor$`Major celltype`)
table(phe_sce6$celltype)
colnames(meta_tumor)
table(meta_tumor$Treatment)
table(meta_tumor$Group)
sce6@meta.data = meta_tumor
可以看到,原来做的复现分群和原文的分群还是有些许差异的

rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
getwd()
setwd('3-cell/')
sce.all=readRDS( "sce_meta.rds")
sce.all
sce = sce.all
table(Idents(sce))
table(sce$Patient)
head(sce@meta.data)
phe = sce@meta.data
sce@meta.data = meta_tumor
colnames(phe)
p1=DimPlot(sce,reduction = "umap",label=T,group.by = 'Patient')
mycolors <-c( '#B53E2B', '#53A85F', '#F1BB72','#D6E7A3', '#F3B1A0',
'#E95C59', '#E59CC4', '#AB3282', '#BD956A',
'#9FA3A8', '#E0D4CA', '#C5DEBA', '#F7F398',
'#C1E6F3', '#6778AE', '#91D0BE',
'#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
'#968175')
library(harmony)
p2=DimPlot(sce,reduction = "umap",label=T ,group.by = "Efficacy",cols = mycolors)
p2
colnames(phe)
#table(phe$treat)
table(phe$patient)
table(phe$Efficacy)
#table(phe$type)
#table(phe$Sample)
#table(phe$Patient)
#table(phe$Origin)
table(phe$Tissue)
table(phe$Group)
table(phe$Treatment)
table(phe$`Major celltype`)
table(phe$Cluster)
p3=DimPlot(sce,reduction = "umap",label=T ,group.by = "Tissue",cols = mycolors)
p3
p4=DimPlot(sce,reduction = "umap",label=T ,group.by = "Group",cols = mycolors)
p4
p5=DimPlot(sce,reduction = "umap",label=T ,group.by = "Treatment",cols = mycolors)
p5
phe = sce@meta.data
colnames(phe)
colnames(phe)[34] = "Major_celltype"
sce@meta.data = phe
p6=DimPlot(sce,reduction = "umap",label=T ,group.by = "Major_celltype",cols = mycolors,label.box = T)
p6
library(patchwork)
(p1+p2+p3)/(p4+p5+p6)
看起来019这个患者很奇特(右下角这一群细胞),这个患者是PR

单独看一下019这个患者的细胞亚群

如果按照PR和non-PR(PD+SD)来区分的话,PR中有41246个细胞,non-PR中有34820个细胞。乍一看,PR中好像每种细胞类型都多一些?

用比例图看一下
##堆积柱状图
library(tidyr)
library(reshape2)
colnames(phe)
tb=table(phe$Major_celltype,
phe$Efficacy_PR)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
col =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
"#885649","#DD76C5","#7F7F7F","#BBBE00","#41BED1")
library(ggthemes)
g1 = ggplot(bar_per, aes(x = percent, y = Var1)) +
geom_bar(aes(fill = Var2) , stat = "identity") +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = "cell type", fill = NULL)+labs(x = 'Fraction of cells')+
scale_fill_manual(values=col)+
labs(title = " ")+
theme_tufte()+
theme(plot.title = element_text(size=12,hjust=0.5))
###2
##堆积柱状图
library(tidyr)
library(reshape2)
colnames(phe)
tb=table(phe$Major_celltype,
phe$patient)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
#write.csv(bar_per,file = "celltype_by_group_percent.csv")
col =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
"#885649","#DD76C5","#7F7F7F","#BBBE00","#41BED1")
#使用colorRampPalette()扩展R包配色方案中的颜色
col4<-colorRampPalette(col)(14)
library(ggsci)
col5<-colorRampPalette((pal_npg("nrc")(9)))(14)
library(ggthemes)
g2 = ggplot(bar_per, aes(x = percent, y = Var1)) +
geom_bar(aes(fill = Var2) , stat = "identity") +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = "cell type", fill = NULL)+labs(x = 'Fraction of cells')+
scale_fill_manual(values=col5)+
labs(title = " ")+
theme_tufte()+
theme(plot.title = element_text(size=12,hjust=0.5))
library(patchwork)
g1+g2
PR的患者中T细胞竟然比non-PR患者中少很多? 右边深蓝色的P013患者髓系和innate lymphoid cells (ILCs)占比很多,这位患者也是PR,使用chemo治疗的。 猜:联合治疗和单独chemo治疗后,免疫微环境中细胞亚群变化的差异,导致不同的机制PR。比如单看atezo+chemo治疗后的P019这位患者,可能就得从B细胞入手?

随后我去翻了一下原文,应该能小小的印证猜想

复旦分型在这里的表达情况 不过这应该是蛋白层面的,我们浅看一下
cell_marker <- c("CD8A","AR","DCLK1","FOXC1")
FeaturePlot(sce,features = cell_marker,cols = c("lightgrey" ,"#DE1F1F"),ncol=2)

也看看联合其他潜在靶点,可能的情况咋样…
#CD39和CD73
cell_marker <- c("ENTPD1","NT5E")
FeaturePlot(sce,features = cell_marker,cols = c("lightgrey" ,"#DE1F1F"),ncol=2)

#"CTLA4","TIGHT","TIM3"('HAVCR2'),"LAG3",'PD1'
cell_marker <- c("CTLA4","PDCD1","HAVCR2","LAG3")
FeaturePlot(sce,features = cell_marker,cols = c("lightgrey" ,"#DE1F1F"),ncol=2)

cell_marker <- c("TIGIT",'PDCD1','LILRB1','BTLA','CD47','VTCN1','CD276')
FeaturePlot(sce,features = cell_marker,cols = c("lightgrey" ,"#DE1F1F"),ncol=4)

再来看看这个图,髓系和B,CD39,CD73和TIM3

接下来是第二阶段,基因与疗效的相关性 (临床信息中只有联合治疗组PD-L1的蛋白表达信息,11人,汇总如下) 那其实PD-L1阳性也不一定拉的开差距,PD-L1的IC评分….咦。

exprSet <- sce@assays[["RNA"]]@data
exprSet = as.data.frame(exprSet)
#exprSet<-as.data.frame(t(exprSet)) #转置
##好像没啥意义啊
a = exprSet[str_detect(rownames(exprSet),'ENTPD1'),]
rownames(a)
b = as.data.frame(t(a))
p = identical(rownames(b),rownames(phe));p
phe$ENTPD1 = b$ENTPD1
phe$ENTPD1_group <- ifelse(phe$ENTPD1 > median(phe$ENTPD1) ,"ENTPD1 High","ENTPD1 Low")
table(phe$ENTPD1_group)
clinical=phe
colnames(clinical)
##堆积柱状图
library(tidyr)
library(reshape2)
colnames(clinical)
tb=table(clinical$ENTPD1_group,
clinical$Efficacy_PR)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
#write.csv(bar_per,file = "celltype_by_group_percent.csv")
col =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
"#885649","#DD76C5","#7F7F7F","#BBBE00","#41BED1")
p2 = ggplot(bar_per, aes(x = percent, y = Var1)) +
geom_bar(aes(fill = Var2) , stat = "identity") + coord_flip() +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = " ", fill = NULL)+labs(x = 'Fraction of patients')+
scale_fill_manual(values=col)+
labs(title = "ENTPD1")+
theme_classic()+
theme(plot.title = element_text(size=12,hjust=0.5))
p2
p1+p2

免疫细胞上表达的ENTPD1在PR的患者中挺高?不知道是否有实际意义。 去原文找了下思路

之前是我思维固化之前想岔了🤨,一直拿治疗后的数据在处理。其实应该看治疗前和治疗后的变化。就算是找有影响的marker,也应该是基线期的状态。 那咱们重新开始。刚开始处理数据的时候就不要删掉pre,然后又8G的数据了
##懒得写循环
#后果就是复制粘贴复制粘贴,再多来几个细胞亚群就不能懒了
#没有懒得写注释已经挺好了
#可能因为我是网上e人,现实i人
library(ggsci)
col5<-colorRampPalette((pal_npg("nrc")(9)))(14)
mycolors <-c( '#B53E2B', '#53A85F', '#F1BB72','#D6E7A3', '#F3B1A0',
'#E59CC4', '#AB3282', '#BD956A',
'#9FA3A8', '#E0D4CA', '#F7F398',
'#C1E6F3', '#6778AE', '#91D0BE',
'#DCC1DD', '#625D9E', '#68A180', '#3A6963'
)
table(sce$Major_celltype)
sce_T = sce[, sce$Major_celltype == c( 'T cell' )]
DimPlot(sce_T, reduction = "umap", group.by = "Cluster",label = F,raster=FALSE,cols = mycolors,split.by = 'Group')

g = function(sce){##堆积柱状图
library(tidyr)
library(reshape2)
phe = sce@meta.data
colnames(phe)
tb=table(phe$Group,
phe$Cluster)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
library(ggthemes)
g1 = ggplot(bar_per, aes(x = percent, y = Var1)) +
geom_bar(aes(fill = Var2) , stat = "identity") +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = "cell type", fill = NULL)+labs(x = 'Fraction of cells')+
scale_fill_manual(values=mycolors)+
labs(title = " ")+
theme_tufte()+
theme(plot.title = element_text(size=12,hjust=0.5))
g1 + coord_flip()
}
g(sce_T)
治疗后相比于治疗前CD8-CXCL13变少,CD8Tem-GZMK变多 那又得分免疫联合治疗组和化疗组的情况…

table(sce$Treatment)
sce_T_com = sce_T[, sce_T$Treatment == c( 'Anti-PD-L1+Chemo' )]
g(sce_T_com)
看起来在联合治疗组,并没有太大这个趋势啊…

于是我们再来看看根据R和NR分组之后
phe_T_com = sce_T_com@meta.data
table(phe_T_com$Group)
table(phe_T_com$Efficacy_PR)
phe_T_com = phe_T_com[phe_T_com$Group != 'Progression',]
phe_T_com$outcom = NA
phe_T_com$outcom <- with(phe_T_com, ifelse(Group == "Pre-treatment" & Efficacy_PR == "PR", "pre_R",
ifelse(Group == "Pre-treatment" & Efficacy_PR == "non-PR", "pre_NR",
ifelse(Group == "Post-treatment" & Efficacy_PR == "non-PR", "post_NR","post_R"))))
table(phe_T_com$outcom)
table(phe_T_com$Cluster)
phe_T_com$CD8CXCL13fraction = phe_T_com$ t_CD8-CXCL13
colnames(phe_T_com)
table(phe_T_com$patient)
library(dplyr)
result <- phe_T_com %>%
group_by(patient, outcom) %>%
summarize(B_percentage = mean(Cluster == "t_CD8-CXCL13")) %>%
ungroup()

col = c("#508E42", "#DF5E38","#7B7DC4",'yellow')
P_T <- ggplot(result,aes(x=outcom,y=percentage,fill=patient)) +
geom_boxplot(size=0.5,fill="white",outlier.fill="white",outlier.color="white",color=col) +
geom_jitter(aes(fill=outcom),width =0.2,shape = 21,size=2) +
scale_fill_manual(values = col)+
theme_bw() +
theme(panel.grid = element_blank(), panel.background = element_blank(),
axis.line = element_line(color = 'black')) +
ylab("Fraction") +xlab("CD8-CXCL13")+
theme(legend.position = "none",
axis.text.x = element_text(colour = "black",angle=45, hjust = 1, family = "Times", size = 16),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 16, face = "plain"),
axis.title.x = element_text(family = "Times", size = 16, face = "plain"),
plot.title = element_text(family = "Times", size = 17, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
P_T
在联合治疗组,治疗后R的患者相较于治疗前CD8-CXCL13增多,且有显著性。但是前面看比例图的时候好像并不是很明显…这里是根据患者分组 治疗后R的患者比NR的患者CD8-CXCL13增多。

B细胞亚群也是一样 这个图大概看一下整体的情况,
sce_B = sce[, sce$Major_celltype == c( 'B cell' )]
DimPlot(sce_B, reduction = "umap", group.by = "Cluster",label = T,raster=FALSE,cols = col4,split.by = 'Group')

再看看比例图
g(sce_B)

由于我是漫无目的探索,这个数据集就先这样了,下次想起什么新的东西,再翻出来探索。