import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import rankdata
以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~
# 导入数据
data_raw = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-04/water.csv")
# 数据预处理
data = (
data_raw
.dropna(subset=["install_year", "country_name"]) # 剔除空数据
.assign(
install_decade = (data_raw["install_year"] // ) * , # 将年份转为年代,即每10年的年初
)
.query("1970 <= install_decade <= 2020") # 筛选1970~2020年的数据
)
# 计算新安装量
data["nb_install"] = data.groupby(["country_name", "install_decade"])["country_name"].transform("size")
# 长宽数据转化-保留1970~2020的数据
data_pivoted = pd.pivot_table(
data, index="country_name", values="nb_install", columns="install_decade"
).dropna().reset_index()
data = pd.melt(data_pivoted, id_vars = "country_name", value_name="nb_install", var_name="decade")
# 处理数据,生成rank(表示每decade新安装量的降序排名)
data["country_name"] = data["country_name"].str.upper()
data["rank"] = data.groupby("decade")["nb_install"].transform(lambda x: rankdata(-x))
data = data.sort_values(["country_name", "decade"])
data.head()
image-20240129173348659
country_name:名称 decade:年代 nb_install:安装量 rank:排名
COUNTRIES = data["country_name"].unique()
COUNTRY = COUNTRIES[]
# 初始化布局
fig, ax = plt.subplots(figsize=(, ))
# 反转y轴
ax.invert_yaxis()
# 循环遍历国家
for country in COUNTRIES:
d = data[data["country_name"] == country]
x = d["decade"].values
y = d["rank"].values
# 突出显示指定国家
if country == COUNTRY:
ax.plot(x, y, color="#0b53c1", lw=2.4, zorder=)
ax.scatter(x, y, fc="w", ec="#0b53c1", s=, lw=2.4, zorder=)
# 其余国家不突出显示
else:
ax.plot(x, y, color="#BFBFBF", lw=1.5)
output_8_0
def add_label(x, y, fontsize, ax):
'''
x:decade取值;y:rank取值
在对应的点附近添加年代和排名信息
'''
PAD = 0.4
ax.annotate(
f"#{int(y)} in\n{str(int(x))[:]}s",
xy=(x, y - PAD),
ha="center",
va="bottom",
fontsize=fontsize,
fontname="Lato",
zorder=
)
# 初始化布局
fig, ax = plt.subplots(figsize=(, ))
ax.invert_yaxis()
for country in COUNTRIES:
d = data[data["country_name"] == country]
x = d["decade"].values
y = d["rank"].values
if country == COUNTRY:
ax.plot(x, y, color="#0b53c1", lw=2.4, zorder=)
ax.scatter(x, y, fc="w", ec="#0b53c1", s=, lw=2.4, zorder=)
# 添加文本信息(首尾点上)
add_label(x[], y[], , ax)
add_label(x[-1], y[-1], ,ax)
else:
ax.plot(x, y, color="#BFBFBF", lw=1.5)
# 删除y刻度
ax.set_yticks([])
# x刻度
ax.set_xticks([, , , , , ])
# x刻度标签
ax.set_xticklabels(
["70s", "80s", "90s", "00s", "10s", "20s"],
fontsize=,
fontfamily="Inconsolata"
)
# 删除底部刻度线
ax.tick_params(bottom=False)
# 删除边框
ax.set_frame_on(False)
# 添加标题
ax.set_title(COUNTRY, fontfamily="Inconsolata", fontsize=, fontweight=);
output_11_0
def plot_country(country, data, annotate, ax):
'''
将上述单个国家的绘制过程写入函数,annotate为控制变量(是否添加文本注释)
'''
for country_inner in COUNTRIES:
d = data[data["country_name"] == country_inner]
x = d["decade"].values
y = d["rank"].values
if country_inner == country:
ax.plot(x, y, color="#0b53c1", lw=2.4, zorder=)
ax.scatter(x, y, fc="w", ec="#0b53c1", s=, lw=2.4, zorder=)
if annotate:
add_label(x[], y[], , ax)
add_label(x[-1], y[-1], , ax)
else:
ax.plot(x, y, color="#BFBFBF", lw=1.5)
ax.set_yticks([])
ax.set_xticks([, , , , , ])
ax.set_xticklabels(
["70s", "80s", "90s", "00s", "10s", "20s"],
fontsize=,
fontfamily="Inconsolata"
)
ax.tick_params(bottom=False)
ax.set_frame_on(False)
ax.set_title(country, fontfamily="Inconsolata", fontsize=, fontweight=)
return ax
# 初始化布局
fig, axes = plt.subplots(, , sharex=True, sharey=True, figsize=(, 7.5))
for idx, (ax, country) in enumerate(zip(axes.ravel(), COUNTRIES)):
# 仅第一个国家添加文本注释
annotate = idx ==
plot_country(country, data, annotate, ax)
# 反转y轴
ax.invert_yaxis()
# 调整布局
fig.subplots_adjust(wspace=0.1, left=0.025, right=0.975, bottom=0.11, top=0.82)
# 标题
fig.text(
x=0.5,
y=0.92,
s="RANKING SOME COUNTRIES BY THE NUMBER\nOF WATER SOURCES INSTALLATIONS BY DECADE",
ha="center",
va="center",
ma="center",
fontsize=,
fontweight="bold",
fontname="Inconsolata"
)
# 著作信息-数据来源
fig.text(
x=0.975,
y=0.05,
s="Data from Water Point Data Exchange",
ha="right",
ma="right",
fontsize=
)
# 著作信息-作者
fig.text(
x=0.975,
y=0.03,
s="@issa_madjid",
ha="right",
ma="right",
fontsize=,
fontweight="bold",
)
# 推特徽标
twitter_symbol = "\uf099"
fig.text(
x=0.925,
y=0.03,
s=twitter_symbol,
ha="right",
ma="right",
fontsize=,
fontweight="bold",
fontfamily="Font Awesome 5 Brands"
)
# 背景色
fig.set_facecolor("#f9fbfc")
output_14_0
参考:Multi panel highlighted lineplots with Matplotlib[1]
共勉~
[1]
Multi panel highlighted lineplots with Matplotlib: https://python-graph-gallery.com/web-highlighted-lineplot-with-faceting/