前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >TCGA数据整理-3

TCGA数据整理-3

作者头像
生信菜鸟团
发布2024-07-11 10:56:26
700
发布2024-07-11 10:56:26
举报
文章被收录于专栏:生信菜鸟团

另一个数据集的整理

GSE162550

下载这两个文件

建立工作目录

rm(list = ls())proj = "DHA"#1.获取表达矩阵dat = data.table::fread("GSE162550_gene_sample_count_with_symbol.xls.gz", data.table = F)# 保留ensemblid ,行名转换exp = as.matrix(dat[,4:9])rownames(exp) = dat[,1] library(tinyarray)exp = trans_exp_new(exp)# 分组信息获取colnames(exp)Group = rep(c("DMSO","DHA"),each = 3)Group = factor(Group,levels = c("DMSO","DHA")) exp = exp[apply(exp, 1, function(x) sum(x > 0) > 0.5*ncol(exp)), ]nrow(exp)# 临床信息获取library(GEOquery)eSet = getGEO("GSE162550",destdir = ".",getGPL = F)eSet = eSet[[1]]clinical = pData(eSet)# 顺便看下表达矩阵,空的dim(exprs(eSet))save(exp,Group,proj,clinical,file = paste0(proj,".Rdata"))

差异分析

三种差异分析函数比较:

输入数据都是count矩阵和对应的分组信息。

---title: "三大R包差异分析"output: html_documenteditor_options: chunk_output_type: console--- ```{r setup, include=FALSE}knitr::opts_chunk$set( collapse = TRUE, comment = "#>")knitr::opts_chunk$set(fig.width = 8,fig.height = 6,collapse = TRUE)knitr::opts_chunk$set(message = FALSE,warning = FALSE)``` ### 1.三大R包差异分析 ```{r}rm(list = ls())load("TCGA-CHOL.Rdata")table(Group)#deseq2----library(DESeq2)colData <- data.frame(row.names =colnames(exp), condition=Group)if(!file.exists(paste0(proj,"_dd.Rdata"))){ dds <- DESeqDataSetFromMatrix( countData = exp, colData = colData, design = ~ condition) dds <- DESeq(dds) save(dds,file = paste0(proj,"_dd.Rdata"))}load(file = paste0(proj,"_dd.Rdata"))class(dds)res <- results(dds, contrast = c("condition",rev(levels(Group))))#constrastc("condition",rev(levels(Group)))class(res)DEG1 <- as.data.frame(res)library(dplyr)DEG1 <- arrange(DEG1,pvalue)DEG1 = na.omit(DEG1)head(DEG1) #添加change列标记基因上调下调logFC_t = 2pvalue_t = 0.05 k1 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange < -logFC_t);table(k1)k2 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange > logFC_t);table(k2)DEG1$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))table(DEG1$change)head(DEG1) #edgeR----library(edgeR) dge <- DGEList(counts=exp,group=Group)dge$samples$lib.size <- colSums(dge$counts)dge <- calcNormFactors(dge) design <- model.matrix(~Group) dge <- estimateGLMCommonDisp(dge, design)dge <- estimateGLMTrendedDisp(dge, design)dge <- estimateGLMTagwiseDisp(dge, design) fit <- glmFit(dge, design)fit <- glmLRT(fit) DEG2=topTags(fit, n=Inf)class(DEG2)DEG2=as.data.frame(DEG2)head(DEG2) k1 = (DEG2$PValue < pvalue_t)&(DEG2$logFC < -logFC_t)k2 = (DEG2$PValue < pvalue_t)&(DEG2$logFC > logFC_t)DEG2$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT")) head(DEG2)table(DEG2$change)###limma----library(limma)dge <- edgeR::DGEList(counts=exp)dge <- edgeR::calcNormFactors(dge)design <- model.matrix(~Group)v <- voom(dge,design, normalize="quantile") fit <- lmFit(v, design)fit= eBayes(fit) DEG3 = topTable(fit, coef=2, n=Inf)DEG3 = na.omit(DEG3) k1 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC < -logFC_t)k2 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC > logFC_t)DEG3$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))table(DEG3$change)head(DEG3) tj = data.frame(deseq2 = as.integer(table(DEG1$change)), edgeR = as.integer(table(DEG2$change)), limma_voom = as.integer(table(DEG3$change)), row.names = c("down","not","up") );tjsave(DEG1,DEG2,DEG3,Group,tj,file = paste0(proj,"_DEG.Rdata"))``` ### 2.可视化 ```{r}library(ggplot2)library(tinyarray)exp[1:4,1:4]# cpm 去除文库大小的影响,count直接比较是不公平的dat = log2(cpm(exp)+1)pca.plot = draw_pca(dat,Group);pca.plot save(pca.plot,file = paste0(proj,"_pcaplot.Rdata")) cg1 = rownames(DEG1)[DEG1$change !="NOT"]cg2 = rownames(DEG2)[DEG2$change !="NOT"]cg3 = rownames(DEG3)[DEG3$change !="NOT"] h1 = draw_heatmap(dat[cg1,],Group)h2 = draw_heatmap(dat[cg2,],Group)h3 = draw_heatmap(dat[cg3,],Group) v1 = draw_volcano(DEG1,pkg = 1,logFC_cutoff = logFC_t)v2 = draw_volcano(DEG2,pkg = 2,logFC_cutoff = logFC_t)v3 = draw_volcano(DEG3,pkg = 3,logFC_cutoff = logFC_t) library(patchwork)(h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none") ggsave(paste0(proj,"_heat_vo.png"),width = 15,height = 10)``` ### 3.三大R包差异基因对比 ```{r}UP=function(df){ rownames(df)[df$change=="UP"]}DOWN=function(df){ rownames(df)[df$change=="DOWN"]} up = intersect(intersect(UP(DEG1),UP(DEG2)),UP(DEG3))down = intersect(intersect(DOWN(DEG1),DOWN(DEG2)),DOWN(DEG3))dat = log2(cpm(exp)+1)hp = draw_heatmap(dat[c(up,down),],Group) #上调、下调基因分别画维恩图up_genes = list(Deseq2 = UP(DEG1), edgeR = UP(DEG2), limma = UP(DEG3)) down_genes = list(Deseq2 = DOWN(DEG1), edgeR = DOWN(DEG2), limma = DOWN(DEG3)) up.plot <- draw_venn(up_genes,"UPgene")down.plot <- draw_venn(down_genes,"DOWNgene") #维恩图拼图,终于搞定 library(patchwork)#up.plot + down.plot# 拼图pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")ggsave(paste0(proj,"_heat_ve_pca.png"),width = 15,height = 10)``` ![](TCGA-CHOL_heat_ve_pca.png) 分组聚类的热图 ```{r}library(ComplexHeatmap)library(circlize)col_fun = colorRamp2(c(-2, 0, 2), c("#2fa1dd", "white", "#f87669"))top_annotation = HeatmapAnnotation( cluster = anno_block(gp = gpar(fill = c("#f87669","#2fa1dd")), labels = levels(Group), labels_gp = gpar(col = "white", fontsize = 12))) m = Heatmap(t(scale(t(exp[c(up,down),]))),name = " ", col = col_fun, top_annotation = top_annotation, column_split = Group, show_heatmap_legend = T, border = F, show_column_names = F, show_row_names = F, use_raster = F, cluster_column_slices = F, column_title = NULL)m```

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2024-07-11,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 生信菜鸟团 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档