网络药理学中,有的时候我们不是只关注某种复方或者中药的靶点,有的时候只要关注某种单体的潜在靶点。这就存在了一个问题,基于单一后者少数的数据库或途径得到的单体靶点十分的有限,不足以满足后续的分析需求。
正好最近做了一个生信需求,需要获取“芍药苷”的潜在靶点。这里,我总结了8种获取单体靶点的方式,尽可能多的获取其全面的靶点。
#CID编号
CID: 442534
#分子式
MF: C23H28O11
#摩尔质量
MW: 480.5 g/mol
#smiles
C[C@]12C[C@@]3([C@@H]4C[C@]1([C@@]4([C@H](O2)O3)COC(=O)C5=CC=CC=C5)O[C@H]6[C@@H]([C@H]([C@@H]([C@H](O6)CO)O)O)O)O
rm(list = ls())
options(stringsAsFactors = F)
library(readxl)
library(RColorBrewer)
library(data.table)
library(venn)
source("/Users\tianfu/Documents/database/RFunction/my_fuction_wangyao.R")
#TCMSP
#TCMSP
TCMSP_target_df <- data.table::fread("table\target.csv",data.table = F)
dim(TCMSP_target_df)#[1] 1489 12
colnames(TCMSP_target_df)
TCMSP_target_df <- TCMSP_target_df[TCMSP_target_df$MOL_ID == "MOL001924",]
dim(TCMSP_target_df)#[1] 4 12
write.csv(TCMSP_target_df,file = "table1/MOL001924_TCMSP.csv")
TCMSP_target <- c()
for (genes in TCMSP_target_df$`Gene Names`){
TCMSP_target <- c(TCMSP_target,strsplit(genes," ")[[1]])
}
TCMSP_target = unique(TCMSP_target)
length(TCMSP_target)#7
write.table(TCMSP_target,file = "table1/MOL001924_TCMSP_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#Hit2
Hit2_target <-read_excel("table/C0705-target.xlsx")
Hit2_target <- unique(Hit2_target$Symbol)
length(Hit2_target)#20
write.table(Hit2_target,file = "table1/C0705_Hit_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#Herb
Herb_target_df1 <- read_excel("table/herb/drug_paper_target2025_5_15.xlsx")
tmp1 <- Herb_target_df1$`Target name`
length(tmp1)#13
Herb_target_df2 <- read_excel("table/herb/ingredient_target2025_5_15.xlsx")
tmp2 <- Herb_target_df2$`Target name`
length(tmp2)#4
Herb_target <- unique(c(tmp1,tmp2))
length(Herb_target)#16
write.table(Herb_target,file = "table1/HBIN038606_Herb_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#swiss
Swiss_target_df <- data.table::fread("table/SwissTargetPrediction.csv",
data.table = F)
dim(Swiss_target_df)#[1] 100 7
Swiss_target_df <-Swiss_target_df[Swiss_target_df$`Probability*` > 0.1,]
dim(Swiss_target_df)#[1] 24 7
Swiss_target <- unique(Swiss_target_df$`Common name`)
Swiss_target_split <- unlist(strsplit(Swiss_target, " "))
length(Swiss_target_split)#24
write.table(Swiss_target_split,file = "table1/Swiss_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#SuperPred
SuperPred_df <- data.table::fread("table/SuperPred_Targets.csv",
data.table = F)
dim(SuperPred_df)#[1] 79 7
colnames(SuperPred_df)
range(SuperPred_df$Probability)
range(SuperPred_df$`Model accuracy`)
# 转换 Probability 为数值型
SuperPred_df$Probability_num <- as.numeric(gsub("%", "", SuperPred_df$Probability)) / 100
# 转换 Model accuracy 为数值型
SuperPred_df$ModelAcc_num <- as.numeric(gsub("%", "", SuperPred_df$`Model accuracy`)) / 100
dim(SuperPred_df)#[1] 24 7
# 筛选 Probability ≥ 0.6 且 Model accuracy ≥ 0.9 的结果
SuperPred_filtered_df <- SuperPred_df[
SuperPred_df$Probability_num >= 0.6 &
SuperPred_df$ModelAcc_num >= 0.9, ]
#uniprot id转化为genesymbol
SuperPred_filtered_df$Entry <- SuperPred_filtered_df$`UniProt ID`
UniProt <- data.table::fread("~/Documents/database/uniprot/uniprotkb_AND_reviewed_true_AND_model_o_2024_09_23.tsv",
data.table = F)
colnames(UniProt)
SuperPred_filtered_df <- merge(SuperPred_filtered_df, UniProt[, c("Entry", "Gene Names")],
by = "Entry", all.x = TRUE)
#按照空格拆分
SuperPred_target <- unique(unlist(strsplit(SuperPred_filtered_df$`Gene Names`, " ")))
length(SuperPred_target)#102
write.table(SuperPred_target,file = "table1/SuperPred_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#pharmmapper
Pharm_df <- data.table::fread("table/pharmmapper_250515091848.csv",
data.table = F)
Pharm_df <- Pharm_df[Pharm_df$`Norm Fit` > 0.8,]
Pharm_df <- Pharm_df[Pharm_df$Uniplot != "NONE",]
dim(Pharm_df)#90
#进行UniProt ID 到gene symbol的映射
UniProt <- data.table::fread("~/Documents/database/uniprot/uniprotkb_AND_reviewed_true_AND_model_o_2024_09_23.tsv",
data.table = F)
UniProt$Uniplot <- UniProt$`Entry Name`
colnames(Pharm_df)
colnames(UniProt)
Pharm_filtered_df <- merge(Pharm_df,UniProt[,c("Uniplot","Gene Names")],
by = "Uniplot", all.x = TRUE)
colnames(Pharm_filtered_df)
Pharm_filtered_df <- Pharm_filtered_df[!is.na(Pharm_filtered_df$`Gene Names`), ]
dim(Pharm_filtered_df)#67
Pharm_target <- unique(unlist(strsplit(Pharm_filtered_df$`Gene Names`," ")))
length(Pharm_target)#167
write.table(Pharm_target,file = "table1/Pharm_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#CTD
CTD_df <- data.table::fread("table/CTD_C015423_ixns_20250515053219.csv",
data.table = F)
CTD_target <- unique(CTD_df$`Gene Symbol`)
length(CTD_target)#37
write.table(CTD_target,file = "table1/CTD_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#BatmanCTM
Batman_target <- unique(c("MAPK8", "PRKAA2", "MMP9", "PRKAB1",
"PRKAA1", "NEDD4L", "IL1B", "MAPK14",
"APP", "ICAM1", "IRAK1", "MITF",
"NFKB1", "IGF1", "MIR29B1", "CREB1",
"STAT3", "NOX4", "TYRP1", "CYBB", "TNF",
"AKT1", "BGLAP", "CD14", "VEGFA", "SNCA",
"SOCS3", "PTGS2", "HSPA4", "HIF1A", "IL6",
"SHH", "ABCB1", "STK11", "LBP", "MAP3K5", "MAPK1"))
length(Batman_target)#37
write.table(Batman_target,file = "table1/Batman_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
#Paeoniflorin----
Paeoniflorin_target <- unique(c(TCMSP_target,
Hit2_target,
Herb_target,
Swiss_target_split,
SuperPred_target,
Pharm_target,
CTD_target,
Batman_target))
length(Paeoniflorin_target)#346
write.table(Paeoniflorin_target,file = "table1/Paeoniflorin_target.csv",
sep = "\t",
quote = F,
row.names = F,col.names = F)
Paeoniflorin_list <- list(
#不能用<- 只能用=
TCMSP = TCMSP_target,
HIT2 = Hit2_target,
HERB = Herb_target,
SwissTargetPrediction = Swiss_target_split,
SuperPred = SuperPred_target,
PharmMapper = Pharm_target,
CTD = CTD_target,
BATMAN = Batman_target
)
my_create_venn_diagram(Paeoniflorin_list,"Paeoniflorin Target Genes",width = 6,title_line = 1)
save(Paeoniflorin_target,file="Rdata/1.Rdata")
#函数
my_create_venn_diagram <- function(gene_sets, title_text,output_file = "figure/", colors = NULL,
width = 5, height = 5, title_cex = 1.5, title_font = 2, title_col = "black", title_line = -1) {
# 设置默认颜色,如果用户未提供
if (is.null(colors)) {
colors <- c("#029149","#E0367A","#5D90BA","#431A3D","#91612D",
"#FFD121","#D8D155","#223D6C","#D20A13","#088247",
"#11AA4D","#7A142C","#5D90BA","#64495D","#7CC767")
}
# 检查输出目录是否存在,如果不存在则创建
if (!dir.exists(output_file)) {
dir.create(output_file, recursive = TRUE)
}
# 使用 title_text 生成文件路径
file_path <- paste0(output_file, title_text, ".pdf")
# 打开 PDF 输出设备
pdf(file = file_path, width = width, height = height)
# 绘制 Venn 图
venn(gene_sets,
col = colors[1:length(gene_sets)],
zcolor = colors[1:length(gene_sets)],
box = FALSE,
ilabels = "counts",
ilcs = 1.2, # 设置交集数字大小
sncs = 0.9 # 设置集合名称字体大小
)
# 添加标题
title(main = title_text,
cex.main = title_cex,
font.main = title_font,
col.main = title_col,
line = title_line)
# 关闭 PDF 输出设备
dev.off()
}数据库使用参考教程:
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。