文章/答案/技术大牛

发布

社区首页 >专栏 >盘点你“梦中情刊”的发文量及发文量最大的作者

盘点你“梦中情刊”的发文量及发文量最大的作者

生信菜鸟团

发布于 2023-08-23 01:06:10

29100

代码可运行

文章被收录于专栏：生信菜鸟团生信菜鸟团

运行总次数：0

代码可运行

1-背景

感谢曾老师又是新一期的学徒作业~

作业灵感来源如下：

生物信息学权威期刊Bioinformatics发文量最大的是谁 (qq.com)

第一作者发表在Bioinformatics杂志上论文数top10 (qq.com)

作业内容：

同样的统计，更换一下杂志，好~

2-代码解读

library(pubmedR)  
api_key = NULL  
  
# pubmed一次只能下载小于1万条记录，这里分成两份  
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'  
res <- pmQueryTotalCount(query = query, api_key = api_key)  
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)  
  
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'  
res <- pmQueryTotalCount(query = query, api_key = api_key)  
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)  
  
m1 = pmApi2df(D1)  
m2 = pmApi2df(D2)  
m = rbind(m1, m2)  
  
df = df[df$DT == "JOURNAL ARTICLE", ]  
fa = gsub(";.*$", "", df$AF)  
sort(table(fa))

代码目的：

总结《Bioinformatics》杂志自创刊以来所有文章的一作，并对作者按照发文数量排序。

同样的条件，我们看一下其他顶刊究竟是谁发文最多吧

嘻嘻嘻，顺便还统计了期刊的发文量

先从CNS开始~

3-CNS

3.1- Cell

1974年创刊，创刊49年

截止检索，共2万多篇文章

排名作者，代码如下~

#"Cell"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("Cell"[Journal]) AND (("1974"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Cell"[Journal]) AND (("1997"[Date - Publication] : "2015"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Cell"[Journal]) AND (("2016"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m = rbind(m1, m2,m3)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Cell.Rdata")
#load(file="Cell.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa 
# HUNTER, T  BONETTA, LAURA 
# 76              15              11 
# LAWRENCE, P A        LEWIN, B SPIEGELMAN, B M 
# 10              10              10 
# WEINTRAUB, H         ORCI, L        KOZAK, M 
# 10               9               8 
# VALE, R D  CLEVELAND, D W        FUCHS, E 
# 8               7               7 
# HENIKOFF, S      KMIEC, E B    LAZARIDES, E 
# 7               7               7

运行结果

检索前3，看大佬从事的领域

3.1.1 HUNTER, T 蛋白

image.png

3.1.2 BONETTA, LAURA 科学评论？

image.png

3.1.3 LAWRENCE, P A 果蝇

image.png

3.2- Science

1880年创刊，共计143年

到目前，有18万文章

代码如下：

略长..

#"Science (New York, N.Y.)"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录
query = '("Science (New York, N.Y.)"[Journal]) AND (("1880"[Date - Publication] : "1893"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1894"[Date - Publication] : "1911"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1912"[Date - Publication] : "1927"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
save(D1,D2,D3,file = "Science_1_3.Rdata")

query = '("Science (New York, N.Y.)"[Journal]) AND (("1928"[Date - Publication] : "1939"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1940"[Date - Publication] : "1950"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1951"[Date - Publication] : "1961"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1962"[Date - Publication] : "1966"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1967"[Date - Publication] : "1971"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1972"[Date - Publication] : "1976"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D9 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1977"[Date - Publication] : "1980"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D10 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1981"[Date - Publication] : "1984"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D11 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1985"[Date - Publication] : "1988"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D12 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1989"[Date - Publication] : "1992"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D13 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1993"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D14 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D15 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

# query = '("Science (New York, N.Y.)"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
# res <- pmQueryTotalCount(query = query, api_key = api_key)
# D16 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("2002"[Date - Publication] : "2005"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D17 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("2006"[Date - Publication] : "2009"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D18 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("2010"[Date - Publication] : "2013"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D19 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("2014"[Date - Publication] : "2017"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D20 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Science (New York, N.Y.)"[Journal]) AND (("2018"[Date - Publication] : "2021"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D21 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2022"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D22 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
load(file = "Science_1_3.Rdata")
load(file = "Science_4_22.Rdata")
save(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,
     D11,D12,D13,D14,D15,D17,D18,
     D19,D20,D21,D22,file = "Science_all.Rdata")
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m9 = pmApi2df(D9)
m10 = pmApi2df(D10)
m11 = pmApi2df(D11)
m12 = pmApi2df(D12)
m13 = pmApi2df(D13)
m14 = pmApi2df(D14)
m15 = pmApi2df(D15)
# m16 = pmApi2df(D16)
m17 = pmApi2df(D17)
m18 = pmApi2df(D18)
m19 = pmApi2df(D19)
m20 = pmApi2df(D20)
m21 = pmApi2df(D21)
m22 = pmApi2df(D22)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8,
          m9, m10, m11,m12,m13,m14,m15,
          m17, m18, m19,m20,m21,m22)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Science_.Rdata")
#load(file="Nature communications.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# WALSH, J      HOLDEN, C      KERR, R A   ABELSON, P H    MARSHALL, E    CARTER, L J     SMITH, R J 
# 18482            829            662            614            544            532            431            390 
# NORMAN, C   WALDROP, M M  ROBINSON, A L     DICKSON, D        WADE, N GREENBERG, D S     MAUGH, T H 
# 389            377            307            271            268            262            241

3.2.1 WALSH, J 科学科普

829篇？重名了？发文时间集中在1962-1989，可能不是重名

找了篇文章看，可能是做科普或者做动物分类的

还有一些社科评论相关的文章

一页3篇Science

寻思WALSH, J 可能是Science期刊的科普作者或者记者

那岂不是发Science就是他的本职工作？？

3.3- Nature

1945年创刊，78年了

近13万文章

代码：

#"Nature"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成八份
query = '("Nature"[Journal]) AND (("1945"[Date - Publication] : "1957"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1958"[Date - Publication] : "1963"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1964"[Date - Publication] : "1968"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1967"[Date - Publication] : "1971"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1972"[Date - Publication] : "1980"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1981"[Date - Publication] : "1988"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1989"[Date - Publication] : "1989"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D16 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1990"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2002"[Date - Publication] : "2005"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D9 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2006"[Date - Publication] : "2009"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D10 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2010"[Date - Publication] : "2013"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D11 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2014"[Date - Publication] : "2016"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D12 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2017"[Date - Publication] : "2019"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D13 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2020"[Date - Publication] : "2022"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D14 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature"[Journal]) AND (("2023"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D15 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)



save(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,
     D11,D12,D13,D14,D15,D16,file = "Nature.Rdata")


m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m9 = pmApi2df(D9)
m10 = pmApi2df(D10)

m11 = pmApi2df(D11)
m12 = pmApi2df(D12)
m13 = pmApi2df(D13)
m14 = pmApi2df(D14)
m15 = pmApi2df(D15)
m16 = pmApi2df(D16)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8,
          m9, m10, m11,m12,m13,m14,m15,m16)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_fa.Rdata")
#load(file="Nature_fa.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# SMAGLIK, PAUL     GEWIN, VIRGINIA EISENSTEIN, MICHAEL     POWELL, KENDALL        SAVAGE, NEIL 
# 2735                 229                 100                  79                  78                  62 
# WOOLSTON, CHRIS SCHIERMEIER, QUIRIN      LEDFORD, HEIDI      ABBOTT, ALISON    CYRANOSKI, DAVID        DOLGIN, ELIE 
# 59                  57                  51                  47                  46                  44 
# PERKEL, JEFFREY M        DANCE, AMBER    WITZE, ALEXANDRA

前15位

出去谨慎，搜第一名SMAGLIK, PAUL

3.3.1 SMAGLIK, PAUL 应该是记者

展示他的2篇报道

1-单细胞测序相关

2-神经科学相关

....估计是记者在追踪报道

发现一位以发Nature为本职工作的

那岂不是...

原来发顶刊的诀窍是... 发不了就加入。

4-Nature子刊

4.1 新子刊

4.1.1- Nature machine intelligence

Nature新的子刊~ 2019年，4年

稍一共60篇文章

##"Nature machine intelligence"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成八份
query = '("Nature machine intelligence"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)

结果：

4.1.1.1 LIU, RUOQI

4.1.2- Nature Computational Science

2021年创刊，也是Nature新子刊，2年

37篇文章，目前还没有影响因子

####"Nature Computational Science"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成八份
query = '("Nature Computational Science"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
# fa
# CHIRIGATI, FERNANDO     RASTOGI, ANANYA                     
# 2                   2                   1 
# AVANTS, BRIAN B    BRYAN, J SHEPARD CHAMBERLAND, MAXIME 
# 1                   1                   1 
# GALA, ROHAN     GAMAZON, ERIC R      GUAN, YUANFANG 
# 1                   1                   1 
# HASEEB, MUHAMMAD    HOFFECKER, IAN T            HOU, HAO 
# 1                   1                   1 
# JIN, CHONG      JING, XIAOYANG   JOSEPH, JERELLE A 
# 1                   1                   1

结果：

4.1.2.1 CHIRIGATI, FERNANDO

看了一眼pdf文件，并不是一般意义上的科研文章。

4.1.2.2 RASTOGI, ANANYA

第二名也是类似情况

pdf内容

就不继续看了。

转

5-Nature老牌子刊

5.1- Nature communications

2010年创刊，13年

共计5万多篇文章

代码：

##"Nature communications"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成八份
query = '("Nature communications"[Journal]) AND (("2010"[Date - Publication] : "2015"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2016"[Date - Publication] : "2017"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2018"[Date - Publication] : "2018"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2019"[Date - Publication] : "2019"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2020"[Date - Publication] : "2020"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2021"[Date - Publication] : "2021"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2022"[Date - Publication] : "2022"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nature communications"[Journal]) AND (("2023"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
#save(D1,D2,D3,D4,D5,D6,D7,D8,file = "Nature communications.Rdata")
#save(D4,file = "Nature communications_D4.Rdata")
load(file = "Nature communications.Rdata")
load(file = "Nature communications_D4.Rdata")
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature communications.Rdata")
#load(file="Nature communications.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# YANG, YANG  LIU, YANG ZHANG, LEI   LIU, WEI WANG, YING ZHANG, WEI  WANG, WEI WANG, YANG ZHANG, TAO    LI, JUN   WANG, QI  ZHANG, YI    LI, JIE    LI, YAN 
# 20         17         15         14         14         14         13         13         13         12         12         12         11         11 
# LIU, YI 
#11

嗯？好像全是国人诶。

诶嘿嘿，那不挺好~

顺着线索扒一扒，看看这个牛气的国人是什么领域的。

扒~

5.1.1 原来是重名了

以下举例了3位YANG, YANG YANG, YANG 1号

YANG, YANG 2号

YANG, YANG 3号

第二名也存在重名

5.2- Nature methods

创立于2004年，19年

5千多篇~

代码：

##"Nature methods"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录
query = '("Nature methods"[Journal]) AND (("2004"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)

emmm,这数量级大概是记者吧，简单搜索一位

5.2.1 VOGT, NINA

好的就是记者...

5.3- Nature genetics

创刊于1992，31年

一共近9000篇

##"Nature genetics"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录
query = '("Nature genetics"[Journal]) AND (("1992"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
#save(D1,file = "Nature_genetics.Rdata")
m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_genetics.Rdata")
#load(file="Nature_genetics.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# ALAM, ORNOB          DANOVI, SAFIA 
# 135                      9                      9 
# FLETCHER, MICHAEL           FAIAL, TIAGO                LI, WEI 
# 9                      8                      8 
# VOGAN, KYLE    GUDMUNDSSON, JULIUS    MCCARROLL, STEVEN A 
# 8                      7                      6 
# BARRETT, JEFFREY C       CONRAD, DONALD F GUDBJARTSSON, DANIEL F 
# 5                      5                      5 
# SULEM, PATRICK        TURNBULL, CLARE                  XU, X 
# 5                      5                      5

结果：

这个数量级正常了

5.3.1 ALAM, ORNOB

emmm，好吧，看不像科研论文

下一位

5.4- Nature biotechnology

创刊于1996年，27年

1万多篇

##Nature biotechnology 
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("Nature biotechnology"[Journal]) AND (("1996"[Date - Publication] : "2010"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)


query = '("Nature biotechnology"[Journal]) AND (("2011"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_biotechnology s.Rdata")
#load(file="Nature_biotechnology s.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# FRANCISCO, MICHAEL        JACOBS, TOM     HUGGETT, BRADY          DEWITT, N DEFRANCESCO, LAURA 
# 774                 50                 48                 47                 43                 41 
# PERSIDIS, A            DOVE, A    LAWRENCE, STACY   SHERIDAN, CORMAC    FREDERICKSON, R         HODGSON, J 
# 37                 32                 31                 31                 30                 29 
# GARBER, KEN           HOYLE, R       RATNER, MARK 
# 20                 20                 17

查了第一名是记者。

好，下一位

5.5- Nature medicine

1995年创立，28年

1万多篇文献

##"Nature medicine"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("Nature medicine"[Journal]) AND (("1995"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)


query = '("Nature medicine"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature medicine.Rdata")
load(file="Nature medicine.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# STOWER, HANNAH       O'LEARY, KAREN 
#                  195                  164                   12 
#        BIRMINGHAM, K     CARVALHO, THIAGO CHAKRADHAR, SHRADDHA 
#                   11                   10                   10 
#  GOLDSTEIN, JOSEPH L             NOVAK, K  WILLYARD, CASSANDRA 
#                    5                    5                    5 
#            BACH, F H         BALLMAIER, M             BOSCH, X 
#                    4                    4                    4 
#        GRAINGER, D J     KEENER, AMANDA B             READY, T 
#                    4                    4                    4

第一名，这个数量级已经没有兴趣查了

6-其他追梦刊物

6.1- Bioinformatics 示例~

创刊1998,25年

1万7的文章数

llibrary(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)

##以下一句有修改
df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
##以下一句有修改，只取了前15名
head(sort(table(fa),decreasing = T),15)

看见大佬了~ 核对了一下检索统计结果，没什么问题看看第一名~

6.1.1FOGG, CHRISTIANA N 记者

依旧不是科研论文

6.1.2 LI, HENG 真大佬，整SAMtools的

哇，真大佬

6.2- BMC bioinformatics

2000年创刊，23年

1万多的文章

rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("BMC bioinformatics"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("BMC bioinformatics"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)

结果：

6.2.1 WINTERS-HILT, STEPHEN 做算法

是学术论文，那真是大牛哇

6.3- Nucleic acids research

1974年创刊，49年

近5万篇文献

##"Nucleic acids research"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成六份
query = '("Nucleic acids research"[Journal]) AND (("1974"[Date - Publication] : "1987"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nucleic acids research"[Journal]) AND (("1988"[Date - Publication] : "1993"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nucleic acids research"[Journal]) AND (("1994"[Date - Publication] : "2004"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nucleic acids research"[Journal]) AND (("2005"[Date - Publication] : "2012"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nucleic acids research"[Journal]) AND (("2013"[Date - Publication] : "2018"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Nucleic acids research"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m = rbind(m1, m2, m3,m4,m5,m6)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)

查作者

6.3.1 NAKAMURA, Y 同类型文章

1988年有峰值

是科研文章，但是是同一类型

6.4- Genome biology

2000年创刊，23年了

5千多文献

#"Genome biology"[Journal]
rm(list = ls())
library(pubmedR)

api_key = NULL

# pubmed一次只能下载小于1万条记录，这里分成两份
query = '("Genome biology"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

query = '("Genome biology"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)

m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)

df=m

df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)