我正试图模拟三个不同比例的接受治疗的病人的十年死亡风险。我已经用了十年的时间,每年都这么做,结果是一段相当长的代码。我想要的是在十年的时间内将其转换为每月一次,并且为了避免数百行代码,我想使用for循环。
我的数据是这样的
set.seed(1234)
N <- 750000
id <- c(1:N)
###creates a sex variable for men and appends women
treated <- rep.int(0,125000)
treated <- append(treated, rep.int(1,125000))
treated <- append(treated, rep.int(0,100000))
treated <- append(treated, rep.int(1,150000))
treated <- append(treated, rep.int(0,75000))
treated <- append(treated, rep.int(1,175000))
groupname <- rep.int(1,250000)
groupname <- c(groupname, rep.int(2,250000))
groupname <- c(groupname, rep.int(3,250000))
从性别和身份向量创建数据
data = data.frame(treated, id, groupname)
class(data$treated)
data$treated <- factor(data$treated, levels = c(0,1), labels = c("untreated","treated"))
data$groupname <- factor(data$groupname, levels = c(1,2,3), labels = c("group 1", "group 2", "group 3"))
然后生成每一个"wave",在这样的十年中(基本相同的代码,只是为每个波形分配了一个新的列名):
data$year_0 <- 1
data$year_1 <- ifelse(data$treated=="treated",rbinom(N, 1, 1-0.035/4), rbinom(N, 1, 1-0.05/4))
data$year_2 <- ifelse(data$treated=="treated",
ifelse(data$year_1 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_1 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_3 <- ifelse(data$treated=="treated",
ifelse(data$year_2 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_2 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_4 <- ifelse(data$treated=="treated",
ifelse(data$year_3 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_3 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_5 <- ifelse(data$treated=="treated",
ifelse(data$year_4 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_4 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_6 <- ifelse(data$treated=="treated",
ifelse(data$year_5 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_5 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_7 <- ifelse(data$treated=="treated",
ifelse(data$year_6 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_6 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_8 <- ifelse(data$treated=="treated",
ifelse(data$year_7 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_7 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_9 <- ifelse(data$treated=="treated",
ifelse(data$year_8 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_8 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
data$year_10 <- ifelse(data$treated=="treated",
ifelse(data$year_9 =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_9 =="0", 0, rbinom(N, 1, 1-0.05/4))
)
###converts to long format
data_long <- reshape(data, direction="long", varying= c(list(4:14)), sep = "_",
idvar="id", timevar=c("year"))
class(data_long$year)
data_long$year <- as.numeric(data_long$year)
data_long$year <- data_long$year -1
我想使用for循环来完成这个任务,这样我就可以模拟120个月我编写了这段代码
for (i in 1:10){ n <- ifelse(data$treated=="treated",
ifelse(data$year_[(i-1)] =="0", 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data$year_[(i-1)] =="0",
0, rbinom(N, 1, 1-0.05/4))
)
data$year_[i] <- n
}
##1: I data$year_[i] <- n :
##error number of items to replace is not a multiple of replacement length
据我所知,此错误表明编码for循环的方式返回长度不兼容的数据。通常我可以通过google进行故障排除,但是当代码不在for循环时运行时,我不知道问题出在哪里。我认为,错误可能是将inot解释为一个字符串,可以用于命名列,但使用粘贴只是在前面提到的警告之外产生了这个警告。
##Fejl i `$<-.data.frame`(`*tmp*`, "year_", value = c(NA, NA, NA, NA, :
##replacement has 750001 rows, data has 750000
谷歌在这个问题上的结果似乎并没有把这个问题说成是一个问题。所以现在的问题是,我还不知道问题出在哪里。
发布于 2017-04-28 16:53:42
考虑使用列名的括号中的引用[[...]]
来传递具有paste0()
和条件的字符串,用于第一年,然后是所有其他年份:
data$year_0 <- 1
for (i in 1:10){
if (i == 1){
n <- ifelse(data$treated=="treated", rbinom(N, 1, 1-0.035/4), rbinom(N, 1, 1-0.05/4))
}
else {
n <- ifelse(data$treated=="treated",
ifelse(data[[paste0("year_", i-1)]] == 0, 0, rbinom(N, 1, 1-0.035/4)),
ifelse(data[[paste0("year_", i-1)]] == 0, 0, rbinom(N, 1, 1-0.05/4))
)
}
data[[paste0("year_", i)]] <- n
}
发布于 2017-04-28 14:38:10
您可以将列year_i
再分配到一个额外的矩阵中。然后,您可以使用更高级的cbind()
逐列扩展矩阵:
set.seed(1234)
N <- 750000
data = data.frame(treated=rep(c(0,1,0,1,0,1), c(125000, 125000, 100000, 150000, 75000, 175000)), id=1:N,
groupname=rep(1:3, each=250000))
data$treated <- factor(data$treated, levels = c(0,1), labels = c("untreated","treated"))
data$groupname <- factor(data$groupname, levels = c(1,2,3), labels = c("group 1", "group 2", "group 3"))
Year <- matrix(1, N, 1) # data$year_0 <- 1
Year <- cbind(Year, ifelse(data$treated=="treated",rbinom(N, 1, 1-0.035/4), rbinom(N, 1, 1-0.05/4))) # data$year_1
for (i in 2:10) {
lastcol <- Year[,ncol(Year)]
Year <- cbind(Year,
ifelse(data$treated=="treated",
ifelse(lastcol==0, 0, rbinom(N, 1, 1-0.035/4)),
ifelse(lastcol==0, 0, rbinom(N, 1, 1-0.05/4)))
)
}
您可以通过预分配来加快一些速度(但大部分是抽样):
set.seed(1234)
K <- 10 # year_0 ... year_K
Year <- matrix(NA, N, K+1)
Year[,1] <- 1 # year_0
Year[,2] <- ifelse(data$treated=="treated", rbinom(N, 1, 1-0.035/4), rbinom(N, 1, 1-0.05/4)) # data$year_1
for (i in 3:(K+1)) Year[,i] <- ifelse(data$treated=="treated",
ifelse(Year[,i-1]==0, 0, rbinom(N, 1, 1-0.035/4)),
ifelse(Year[,i-1]==0, 0, rbinom(N, 1, 1-0.05/4)))
如果需要,可以将数据和矩阵Year
放在一起。如果是这样的话,最好将列名分配给矩阵:
colnames(Year) <- paste0("year_", 0:K)
https://stackoverflow.com/questions/43682355
复制相似问题