import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~
lemurs = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-24/lemur_data.csv",
engine = "python",
encoding = "ISO-8859-1"
)
taxonomy = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-24/taxonomy.csv"
)
data = (
lemurs
# 删除重复
.drop_duplicates(["dlc_id", "taxon", "sex", "age_max_live_or_dead_y"])
# 清洗性别
.query("sex != 'ND'")
# 按taxon、sex汇总
.groupby(["taxon", "sex"])
.agg(
age_max=pd.NamedAgg("age_max_live_or_dead_y", max), # 计算最大值
count=pd.NamedAgg("age_max_live_or_dead_y", "size") # 统计数量
)
.reset_index()
)
data.head()
image-20240129174819932
# 按照sex对age_max和count进行数据透视
data = pd.pivot_table(
data, index="taxon", columns="sex", values=["age_max", "count"]
).reset_index()
# 对透视后的列名处理,按_连接
data.columns = data.columns.map("_".join).str.strip("_")
# 修改CMED为CMEAD
data["taxon"] = np.where(data["taxon"] == "CMED", "CMEAD", data["taxon"])
# 匹配taxonomy获取物种信息
data = pd.merge(data, taxonomy, how="left", on="taxon")
data.head()
image-20240129174852256
# 颜色变量
GREY96 = "0.96"
GREY30 = "0.3"
GREY15 = "0.15"
BLUE = "#3A8AD8"
ORANGE = "#F2A039"
# 物种名称、性别
names = data["latin_name"].values
age_max_F = data["age_max_F"].values
age_max_M = data["age_max_M"].values
# 自定义颜色判断:age_max_F>age_max_M
color = np.where(data["age_max_F"] > data["age_max_M"], BLUE, ORANGE)
# 自定义缩放函数:用于缩放lemurs数量
def scale(x, x_min=None, x_max=None, lower=0, upper=1):
if x_min is None:
x_min = np.min(x)
if x_max is None:
x_max = np.max(x)
return (x - x_min) / (x_max - x_min) * (upper - lower) + lower
# 计算lemurs的大小:用于点的size
n_lemurs_min = min(np.min(data["count_F"]), np.min(data["count_M"]))
n_lemurs_max = max(np.max(data["count_F"]), np.max(data["count_M"]))
lower = 10
upper = 100
size_F = scale(data["count_F"].values, n_lemurs_min, n_lemurs_max, lower, upper)
size_M = scale(data["count_M"].values, n_lemurs_min, n_lemurs_max, lower, upper)
# 各物种的垂直位置调整(手动):adjustText效果不好
vertical_adjustments = {
"Varecia rubra": 0,
"Daubentonia madagascariensis": 0.6,
"Eulemur macaco": 0.6,
"Eulemur rubriventer": 0.4,
"Eulemur sanfordi": 0.5,
"Eulemur collaris": -0.1,
"Perodicticus potto": -0.6,
"Lemur catta": 0,
"Eulemur flavifrons": 0,
"Nycticebus coucang": 0,
"Otolemur garnettii garnettii": 0,
"Mircocebus murinus": 0,
"Galago moholi": 0,
"Varecia variegata variegata": 0,
"Eulemur mongoz": 0,
"Eulemur fulvus": 0,
"Eulemur rufus": 0,
"Propithecus coquereli": 0.3,
"Cheirogaleus medius": 0.3,
"Eulemur Eulemur": -0.1,
"Eulemur coronatus": -0.35,
"Eulemur albifrons": -0.25,
"Hapalemur griseus griseus": 0,
"Loris tardigradus": 0,
"Mirza coquereli": 0,
"Varecia Varecia": 0,
"Nycticebus pygmaeus": 0
}
# 初始化布局
fig, ax = plt.subplots(figsize = (9, 11))
# 调整布局边距
fig.subplots_adjust(left=0.05, right=0.90, top=0.9, bottom=0.075)
# 背景颜色
fig.set_facecolor(GREY96)
ax.set_facecolor(GREY96)
# 迭代绘制线图和散点图
for y0, y1, c, s0, s1 in zip(age_max_F, age_max_M, color, size_F, size_M):
ax.plot([1, 2], [y0, y1], c=c, lw=1)
ax.scatter(1, y0, c = c, s=s0, zorder=10)
ax.scatter(2, y1, c = c, s=s1, zorder=10)
output_14_0
# 点和标签的空间距离
TEXT_HPADDING = 0.08
# 线和点之间的空间距离
LINE_HPADDING1 = 0.02
# 线条和标签之间的空间距离
LINE_HPADDING2 = 0.07
for i, name in enumerate(names):
# 对物种名称进行垂直调整
VA = vertical_adjustments[name]
# 奇数将物种名称绘制在左侧
if i % 2 != 0:
# 添加标签
x = 1 - TEXT_HPADDING
y = age_max_F[i] + VA
ax.text(
x, y, name, size=11, name="Faune",
color=GREY15, ha="right", va="center"
)
# 添加标签与点之间的连接线
x = [1 - LINE_HPADDING2, 1 - LINE_HPADDING1]
y = [age_max_F[i] + VA, age_max_F[i]]
ax.plot(x, y, color=GREY15, lw=0.5)
# 偶数将物种名称绘制在左侧
else:
# 添加标签
x = 2 + TEXT_HPADDING
y = age_max_M[i] + VA
ax.text(
x, y, name, size=11, name="Faune",
color=GREY15, va="center"
)
# 添加标签与点之间的连接线
x = [2 + LINE_HPADDING1, 2 + LINE_HPADDING2]
y = [age_max_M[i], age_max_M[i] + VA]
ax.plot(x, y, color=GREY15, lw=0.5)
fig
output_16_0
# 删除边框
ax.set_frame_on(False)
# 删除x、y轴刻度
ax.set_xticks([])
ax.set_yticks([])
# 自定义x轴范围
ax.set_xlim(0.5, 2.5)
# 背景中添加水平线
ax.hlines(np.arange(10, 41), 1.1, 1.9, alpha=0.2, lw=0.5, color=GREY15, zorder=0)
# 添加文本注释(10~40;Female;Male)
for y in np.arange(10, 45, 5):
ax.scatter(1.5, y, s=1200, color=GREY96)
ax.text(
1.5, y, str(y),
size=22, name="Faune", color="darkgreen", weight="bold",
alpha=0.2, va="center", ha="center"
)
ax.text(
0.9, 11.25, "Female",
name="Faune", size=27, color="darkgreen",
weight="bold", alpha=0.3, ha="right"
)
ax.text(
2.1, 11.25, "Male", name="Faune", size=27, color="darkgreen",
weight="bold", alpha=0.3,ha="left"
)
fig
# 图例的值
labels = np.array([20, 40, 60, 80, 100])
# 将值的size进行缩放
sizes = scale(labels, n_lemurs_min, n_lemurs_max, lower, upper)
# 创建handles:首个是文本注释
handles = [Line2D([], [], label="Total number of lemurs hosted at DLC by species and sex", lw=0)]
# 其余的handles是点
handles += [
Line2D(
[],
[],
label=str(label),
lw=0,
color="None",
marker="o",
markersize=np.sqrt(size),
markerfacecolor="black"
)
for label, size in zip(labels, sizes)
]
# 添加图例
legend = fig.legend(
handles=handles,
bbox_to_anchor=[0.5, 0.05],
handletextpad=0.5,
handlelength=1,
columnspacing=1,
loc="center",
ncol=6,
frameon=False
)
# 自定义图例样式
for i, text in enumerate(legend.get_texts()):
text.set_fontfamily("Faune")
text.set_color("darkgreen")
if i == 0:
text.set_fontsize(11)
else:
text.set_fontsize(9)
fig
output_20_0
# 标题
fig.text(
0.5, 0.94, "Longest lived lemurs at the Duke Lemur Center",
size=28,
name="Faune",
weight="bold",
ha="center",
color="darkgreen",
)
# 副标题
fig.text(
0.5, 0.905, "Maximum age recorded in years (living or dead), by species and sex",
size=18,
name="Faune",
color=GREY30,
ha="center",
)
# 著作信息
fig.text(
0.5, 0.025, "Source: Duke Lemur Center · Graphic: Georgios Karamanis",
size=10,
color=GREY30,
family="Faune",
ha="center"
)
fig
output_22_0
参考:Parallel coordinate chart with Python and Matplotlib[1]
共勉~
[1]
Parallel coordinate chart with Python and Matplotlib: https://python-graph-gallery.com/web-lemurs-parallel-chart/