关于概念,可以阅读笔者的或者其他老师的推文:https://mp.weixin.qq.com/s/pHVwjQd2Se3nKl601j1meg
rm(list = ls())
library(stringr)
library(survival)
library(survminer)
library(dcurves)
proj <- "ttt"
load('data.Rdata') # TCGA-HNSC数据
colnames(meta)
variables <- c("cluster", "gender", "neoadjuvant")
meta <- cbind(meta[,c(1:3)],
meta[,c("cluster", "gender", "neoadjuvant")])
data <- meta
data <- na.omit(data)
dim(data)
# 如果是连续的代码,需要设置data
data$OS.time <- data$OS.time
# 对变量进行数值化(0,1)
data$cluster <- as.numeric(ifelse(data$cluster=="1","0","1"))
data$gender <- as.numeric(ifelse(data$gender=="FEMALE","0","1"))
data$neoadjuvant <- as.numeric(ifelse(data$neoadjuvant=="No","0","1"))
# 数据分割 7:3,8:2 均可
# 划分是随机的,设置种子数可以让结果复现
set.seed(123)
ind <- sample(1:nrow(data), size = 0.7*nrow(data))
train <- data[ind,]
test <- data[-ind, ]
# 拟合训练集的模型
fit_train <- glm(OS~cluster + gender + neoadjuvant,data = train,family = binomial)
train$prob_train <- predict(fit_train, type="response")
dca(OS ~ prob_train,
data = train,
thresholds = seq(0, 0.6, by = 0.01),# 设定不同的阈值
label = list(model_train = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("binomial_train.pdf",width = 9,height = 7)
# 拟合验证集的模型
test$prob_test <- predict(fit_train,newdata = test,type="response")
dca(OS ~ prob_test,
data = test,
thresholds = seq(0, 0.6, by = 0.01),# 设定不同的阈值
label = list(model_train = "Test dataset")) %>%
plot(smooth = TRUE)
ggsave("binomial_test.pdf",width = 9,height = 7)
# 拟合训练集的模型
fit_train <- coxph(Surv(OS.time,OS)~cluster + gender + neoadjuvant,data = train)
# 计算12,24,36个月的概率
train$prob12 <- c(1-(summary(survfit(fit_train, newdata=train), times=12)$surv))
train$prob24 <- c(1-(summary(survfit(fit_train, newdata=train), times=24)$surv))
train$prob36 <- c(1-(summary(survfit(fit_train, newdata=train), times=36)$surv))
# 12个月训练集
dca(Surv(OS.time,OS)~prob12,
data = train,
time = 12,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob12 = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("12months_train.pdf",width = 9,height = 7)
# 24个月训练集
dca(Surv(OS.time,OS)~prob24,
data = train,
time = 24,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob24 = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("24months_train.pdf",width = 9,height = 7)
# 36个月训练集
dca(Surv(OS.time,OS)~prob36,
data = train,
time = 36,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob36 = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("36months_train.pdf",width = 9,height = 7)
分别用于评估在不同时间点(12个月、24个月和36个月)下模型的预测效能。
# 验证集
# 需要使用训练集的数据去预测test数据
# 计算12,24,36个月的概率
test$prob12 <- c(1-(summary(survfit(fit_train, newdata=test), times=12)$surv))
test$prob24 <- c(1-(summary(survfit(fit_train, newdata=test), times=24)$surv))
test$prob36 <- c(1-(summary(survfit(fit_train, newdata=test), times=36)$surv))
# 12个月验证集
dca(Surv(OS.time,OS)~prob12,
data = test,
time = 12,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob12 = "Test dataset")) %>%
plot(smooth = TRUE)
ggsave("12months_test.pdf",width = 9,height = 7)
# 24个月验证集
dca(Surv(OS.time,OS)~prob24,
data = test,
time = 24,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob24 = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("24months_test.pdf",width = 9,height = 7)
# 36个月验证集
dca(Surv(OS.time,OS)~prob36,
data = test,
time = 36,
thresholds = seq(0, 0.60, by = 0.01),
label = list(prob36 = "Train dataset")) %>%
plot(smooth = TRUE)
ggsave("36months_test.pdf",width = 9,height = 7)
注:若对内容有疑惑或者有发现明确错误的朋友,请联系后台(欢迎交流)。更多内容可关注公众号:生信方舟
- END -
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。