数据来源是
http://www.espn.com/nba/salaries/_/year/2020
写了一个简单的python脚本来爬取2000年到2020年NBA球员薪水排行榜的top10
import math
import requests
from bs4 import BeautifulSoup
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
fw = open('NBA_salary_3.txt','w')
fw.write('%s\t%s\t%s\t%s\t%s\t%s\n'%("year","rk","player","position","team","salary"))
years = list(range(2000,2021))
for year in years:
URL = "http://www.espn.com/nba/salaries/_/year/" + str(year)
print(URL)
page = requests.get(URL,headers=headers)
soup = BeautifulSoup(page.content,'html.parser')
df = soup.find_all('tr')
j = 0
for aa in df:
j = j + 1
if j == 1:
continue
elif j > 1 and j < 12:
all_td = aa.find_all('td')
rk = all_td[0].get_text()
player_position = all_td[1].get_text()
#print(player_position)
player = player_position.split(",")[0]
position = player_position.split(",")[1]
team = all_td[2].get_text()
salary = all_td[3].get_text().replace("$","").replace(",","")
fw.write('%s\t%s\t%s\t%s\t%s\t%s\n'%(str(year),rk,player,position,team,salary))
else:
break
fw.close()
library(ggplot2)
ggplot(df1,aes(x=rk,y=salary))+
geom_col(aes(fill=player))+
geom_label(aes(label=paste0(salary,"万"),hjust=1))+
theme_bw()+
scale_x_continuous(breaks = 1:10,labels = df1$player)+
coord_flip()+labs(x="",y="")+
theme(legend.position = "none")
image.png
使用的工具是R语言的gganimate包
参考的文章是
https://datascienceplus.com/how-to-build-animated-bar-plots-using-r/
用到的代码是
df<-read.csv("NBA_salary_3.txt",header=T,sep="\t",stringsAsFactors = F)
df$salary<-df$salary/10000
head(df)
library(tidyquant)
x1<-palette_dark()
colors<-matrix(x1)[,1][1:10]
colors<-sample(colors,69,replace = T)
colors
library(ggplot2)
staticplot = ggplot(df, aes(rk, group = player,
fill = as.factor(player),
color = as.factor(player))) +
geom_tile(aes(y = salary/2,
height = salary,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(player, " ")), vjust = 0.2,
hjust = 1,color="black") +
geom_text(aes(y=salary,label = salary, hjust=0),color="black") +
scale_fill_manual(values = colors)+
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major.x = element_line( size=.1, color="grey" ),
panel.grid.minor.x = element_line( size=.1, color="grey" ),
plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
plot.background=element_blank(),
plot.margin = margin(2,2, 2, 4, "cm"))
staticplot
library(gganimate)
help(package="gganimate")
anim = staticplot + transition_states(year,
transition_length = 4,
state_length = 1) +
view_follow(fixed_x = TRUE) +
labs(title = 'NBA player Salary : {closest_state}',
subtitle = "Top 10 Players",
caption = "Data Source: http://www.espn.com/nba/salaries")
animate(anim, 300, fps = 10, duration = 20,
width = 1200, height = 1000,
renderer = ffmpeg_renderer()) -> for_mp4
anim_save("animation_6.mp4", animation = for_mp4 )