以前画的箱线图比较基础,最近看到一篇关于单细胞箱线图美化的帖子,正好今天在做bulkRNA的分析,就照个这篇帖子的代码,重写了一份适合bulkRNA的箱线图美化的代码。
参考帖子:https://mp.weixin.qq.com/s/yFJvTdQ4UUJhot3vvMjfiA
美化效果:

重写适用于bulkRNA的绘图代码:
主要需要传人3个参数

函数
plot_genes_box_tianfu <- function(exp, Group, related_gene,
output_file = "figure/genes_box.pdf",
fig_width = 12, fig_height = 8,
ncol_facet = 4) {
# -------------------------------------------------------------
# exp: 表达矩阵,行为基因,列为样本
# Group: 因子型向量,长度与列数一致,表示样本分组(如 normal / tumor)
# related_gene: 感兴趣的基因向量
# file_name: 保存图片的文件名(含路径)
# fig_width: 保存图片的宽度(单位:英寸)
# fig_height: 保存图片的高度(单位:英寸)
# ncol_facet: facet_wrap 每行排列多少个基因
# -------------------------------------------------------------
library(ggplot2)
library(ggpubr)
library(dplyr)
library(tidyr)
library(tibble)
library(rstatix)
# 1. 整理数据 ---------------------------------------------------
df_long <- exp %>%
as.data.frame() %>%
rownames_to_column(var = "Gene") %>%
filter(Gene %in% related_gene) %>%
pivot_longer(
cols = -Gene,
names_to = "Sample",
values_to = "Expression"
)
# 添加分组信息
df_long$Group <- Group[match(df_long$Sample, colnames(exp))]
df_long$Group <- factor(df_long$Group, levels = c("normal", "tumor"))
# 2. 计算中位值 --------------------------------------------------
bb <- df_long %>%
group_by(Gene, Group) %>%
summarise(median_value = median(Expression), .groups = "drop")
# 3. 每个基因单独显著性检验 -------------------------------------
pval_df <- df_long %>%
group_by(Gene) %>%
wilcox_test(Expression ~ Group) %>%
add_significance("p") %>%
mutate(Group1 = "normal", Group2 = "tumor")
# 确定显著性标签位置(最大值的 105%)
ypos <- df_long %>%
group_by(Gene) %>%
summarise(y.position = max(Expression) * 1.05, .groups = "drop")
pval_df <- left_join(pval_df, ypos, by = "Gene")
# 4. 自定义配色 --------------------------------------------------
color1 <- c("#66C2A5", "#FC8D62")
# 5. 绘图 --------------------------------------------------------
p <- ggplot(df_long, aes(x = Group, y = Expression)) +
geom_boxplot(aes(fill = Group), outlier.shape = NA, lty = 0) +
stat_boxplot(geom = "errorbar", linewidth = 0.8, width = 0.3, color = "grey40") +
# 中位线(关闭继承映射防止 fill 冲突)
geom_segment(data = bb,
aes(x = as.numeric(Group) - 0.25,
xend = as.numeric(Group) + 0.25,
y = median_value,
yend = median_value),
inherit.aes = FALSE,
color = "blue", linewidth = 0.8) +
# 平均值
stat_summary(fun = mean, geom = "point", shape = 8, size = 2, color = "white") +
# 显著性
stat_pvalue_manual(pval_df,
label = "p.signif",
xmin = "Group1",
xmax = "Group2",
y.position = "y.position",
tip.length = 0.01,
size = 3) +
scale_fill_manual(values = color1) +
facet_wrap(~ Gene, scales = "free_y", ncol = ncol_facet) +
theme_classic() +
theme(strip.text = element_text(face = "plain", size = 8),#控制每个面板顶部基因名字的字体粗细(bold)和大小(12)
strip.background = element_blank(), # 去掉顶部基因名字的背景方框
axis.text.y = element_text(size = 7, color = "grey20"),
axis.text.x = element_text(size = 7, color = "grey20"),
axis.title.y = element_text(size = 8, color = "grey20"),
axis.title.x = element_blank(),#隐藏 x 轴标题
axis.line = element_line(linewidth = 0.5),
axis.ticks = element_line(linewidth = 0.5),
#panel.grid.major = element_line(linewidth = 0.3, color = "#E8E8E8"),
panel.grid.major = element_blank(), # 不显示主网格线
legend.position = "none")#隐藏图例
# 6. 保存图片 ----------------------------------------------------
ggsave(output_file, plot = p, width = fig_width, height = fig_height)
# 返回绘图对象,方便在 R 里直接查看
return(p)
}调用示例
p <- plot_genes_box_tianfu(exp, Group, related_gene,
output_file = "figure/genes_box.pdf",
fig_width = 8, fig_height = 6,
ncol_facet = 6)
print(p)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。