下面这个流程是下载这个网站公开数据的方法,使用到的工具是TCGAbiolinks(https://github.com/BioinformaticsFMRP/TCGAbiolinks),
主要是两种RNA表达谱数据和基因突变maf数据
conda create -n R4 -c conda-forge -y r-essentials r-base r-devtools
conda activate R4
R
install.packages("BiocManager")
BiocManager::install("BioinformaticsFMRP/TCGAbiolinksGUI.data")
BiocManager::install("BioinformaticsFMRP/TCGAbiolinks") ## 致敬开发者
批量下载代码
library(TCGAbiolinks)
projects <- getGDCprojects()
projects <- projects$project_id
TCGA_dowload<-function(x,dirpath){
#转录组数据
query.exp <-GDCquery(
project = x,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts""
)
GDCdownload(query.exp)
Exp <- GDCprepare(query = query.exp)
#SNV数据
query.maf <- GDCquery(
project = x,
data.category = "Simple Nucleotide Variation",
access = "open"
)
Maf <- GDCprepare(query = query.maf)
saveRDS(Maf,file = paste0(dirpath,x,"_maf.rds"))
#甲基化数据
for (i in c("450","27")) {
query_met.hg38 <- GDCquery(
project = x,
data.category = "DNA Methylation",
platform = paste0("Illumina Human Methylation ",i),
data.type = "Methylation Beta Value"
)
Met <- GDCprepare(query = query_met.hg38)
saveRDS(Met,file = paste0(dirpath,x,"_met_Ill",i,".rds"))
}
#miRNA数据
query.mirna <- GDCquery(
project = x,
experimental.strategy = "miRNA-Seq",
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification"
)
GDCdownload(query.mirna)
Mirna <- GDCprepare(query = query.mirna)
saveRDS(Mirna,file = paste0(dirpath,x,"_miRNA.rds"))
#蛋白表达量
query.rppa <- GDCquery(
project = x,
data.category = "Proteome Profiling",
data.type = "Protein Expression Quantification"
)
GDCdownload(query.rppa)
Proteins <- GDCprepare(query.rppa)
saveRDS(Proteins,file = paste0(dirpath,x,"_protein.rds"))
}
## 批量下载数据
for (i in projects) {
print(i)
try(TCGA_dowload(i,dirpath = "./TCGAbiolinks_data/"),silent = T)
}
下载数据说明
TCGA_ACC_Exp<-readRDA("TCGA-ACC_exp.rds") ##注意文件路径要正确
TCGA_ACC_Exp_unstrand<-SummarizedExperiment::assay(TCGA_ACC_Exp,1)
TCGA_ACC_clinData<-SummarizedExperiment::colData(TCGA_ACC_Exp)
One More Thing......