import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~
df_pw = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv')
df_pw = df_pw.dropna(subset=['password'])
df_pw.head()

image-20240129180657919
rank:在已发布密码库中的排名 password:密码的文本 category:密码所属的类别 value:通过在线猜测破解的时间 time_unit:与value匹配的时间单位
def to_seconds(value, time_unit):
'''
将时间转换为统一的秒
'''
if time_unit == "seconds":
return value
elif time_unit == "minutes":
return value * 60
elif time_unit == "hours":
return value * 60 * 60
elif time_unit == "days":
return value * 60 * 27
elif time_unit == "weeks":
return value * 60 * 24 * 7
elif time_unit == "months":
return value * 60 * 24 * 30
elif time_unit == "years":
return value * 60 * 24 * 365
else:
return np.nan
# 时间列表(秒)
TIMES = [
to_seconds(row["value"], row["time_unit"])
for _, row in df_pw.iterrows()
]
TIME_MAX = np.max(TIMES)
TIME_MIN = np.min(TIMES)
# 时间缩放函数
def scale_to_interval(x, low=1, high=60):
return ((x - TIME_MIN) / (TIME_MAX - TIME_MIN)) * (high - low) + low
# 基础变量设置
# 定义多种不同的灰色
GREY88 = "#e0e0e0"
GREY85 = "#d9d9d9"
GREY82 = "#d1d1d1"
GREY79 = "#c9c9c9"
GREY97 = "#f7f7f7"
GREY60 = "#999999"
# x轴的值
ANGLES = np.linspace(0, 2 * np.pi, len(TIMES), endpoint=False)
# y的长度
HEIGHTS = np.array(TIMES)
# 类别编码
CATEGORY_CODES = pd.Categorical(df_pw["category"]).codes
# 自定义Colormap
COLORMAP = ["#5F4690", "#1D6996", "#38A6A5", "#0F8554", "#73AF48",
"#EDAD08", "#E17C05", "#CC503E", "#94346E", "#666666"]
# 为每个密码类别匹配颜色
COLORS = np.array(COLORMAP)[CATEGORY_CODES]
# 常量,用于提升空间距离(轴心到起始点)
PLUS = 1000
# 存储指定的密码信息:value>90(结果包含4个密码)
LABELS_DF = df_pw[df_pw["value"] > 90].reset_index()
# 增加标签列
LABELS_DF["label"] = [
f"{pswrd}\nRank: {int(rank)}"
for pswrd, rank in zip(LABELS_DF["password"], LABELS_DF["rank"])
]
# 设置指定密码的位置信息
# x的位置为TIMES对应的索引,偏差是为了插入文本标签
LABELS_DF["x"] = [40, 332, 401, 496]
# y的长度为HEIGHTS的长度,因为y轴对数化了,所以48498112和160000000转化为对数基本无差
LABELS_DF["y"] = [160000000, 90000000, 45000000, 48498112]
# 初始化布局
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={"projection": "polar"})
# 背景色
fig.patch.set_facecolor("white")
ax.set_facecolor("white")
# 径向轴刻度对数化
ax.set_rscale('symlog')
# 角度起始位置:90度
ax.set_theta_offset(np.pi / 2)
# 反转方向:按逆时针旋转绘制
ax.set_theta_direction(-1)
# 添加线
ax.vlines(ANGLES, 0 + PLUS, HEIGHTS + PLUS, color=COLORS, lw=0.9)
# 添加点
ax.scatter(ANGLES, HEIGHTS + PLUS, s=scale_to_interval(HEIGHTS), color=COLORS);

output_12_0
# 移除边框(极坐标)
ax.spines["start"].set_color("none")
ax.spines["polar"].set_color("none")
# 移除网格线、x刻度、y刻度标签
ax.grid(False)
ax.set_xticks([])
ax.set_yticklabels([])
# 添加自定义网格线:每圈分别代表一天、周、月、年
HANGLES = np.linspace(0, 2 * np.pi, 200)
ax.plot(HANGLES, np.repeat(1 * 24 * 60 + PLUS, 200), color= GREY88, lw=0.7)
ax.plot(HANGLES, np.repeat(7 * 24 * 60 + PLUS, 200), color= GREY85, lw=0.7)
ax.plot(HANGLES, np.repeat(30 * 24 * 60 + PLUS, 200), color= GREY82, lw=0.7)
ax.plot(HANGLES, np.repeat(365 * 24 * 60 + PLUS, 200), color= GREY79, lw=0.7)
# 为指定的密码添加标签
for idx, row in LABELS_DF.iterrows():
color = COLORS[row["index"]]
ax.text(
x=ANGLES[row["x"]], y=row["y"], s=row["label"], color=color,
ha="right", va="center", ma="center", size=8,
family="Roboto Mono", weight="bold"
)
fig

output_14_0
# 灰色填充
ax.fill(HANGLES, np.repeat(PLUS, 200), GREY97)
# 添加文本信息
# 利用transform=ax.transAxes根据极坐标位置添加标签。例如(0.5, 0.5)表示中间位置
ax.text(
x=0.5, y=0.58, s="********\nCracking\nYour Favorite\nPassword\n********",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=18, fontweight="bold", linespacing=0.87, transform=ax.transAxes
)
ax.text(
x=0.5, y=0.46, s="Time it takes to crack the 500 most\ncommon passwords by online guessing.\nSorted by rank and colored by category.",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=7, linespacing=0.87, transform=ax.transAxes
)
ax.text(
x=0.5, y=0.39, s="Time is displayed on a logarithmic scale\nwith the rings representing one day,\none week, one month, and one year\n(from inner to outer ring).",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=7, linespacing=0.87, transform=ax.transAxes
)
fig

为每个类别的密码单独绘制圆形棒棒糖图
# 将上述的步骤抽象为函数circular_plot
def circular_plot(angles, heights, colors, lw, ax):
ax.set_facecolor("white")
ax.set_rscale("symlog")
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
ax.spines["start"].set_color("none")
ax.spines["polar"].set_color("none")
ax.grid(False)
ax.set_xticks([])
ax.set_yticklabels([])
# lw用来控制线宽
ax.vlines(angles, 0 + PLUS, heights + PLUS, color=colors, lw=lw)
ax.scatter(angles, heights + PLUS, s=scale_to_interval(heights), color=colors)
HANGLES = np.linspace(0, 2 * np.pi, 200)
ax.plot(HANGLES, np.repeat(1 * 24 * 60 + PLUS, 200), color= GREY88, lw=0.7)
ax.plot(HANGLES, np.repeat(7 * 24 * 60 + PLUS, 200), color= GREY85, lw=0.7)
ax.plot(HANGLES, np.repeat(30 * 24 * 60 + PLUS, 200), color= GREY82, lw=0.7)
ax.plot(HANGLES, np.repeat(365 * 24 * 60 + PLUS, 200), color= GREY79, lw=0.7)
ax.fill(HANGLES, np.repeat(PLUS, 200), GREY97)
# 扩大极坐标半径:给予较大点更为适合的绘图区域
ax.set_rmax(ax.get_rmax() * 2)
# 自定义函数map_category,将一些名称较长的category换为换行符
def map_category(category):
if category == "cool-macho":
return "cool-\nmacho"
elif category == "nerdy-pop":
return "nerdy-\npop"
elif category == "password-related":
return "password-\nrelated"
elif category == "rebellious-rude":
return "rebel-\nlious-\nrude"
elif category == "simple-alphanumeric":
return "simple-\nalpha-\nnumeric"
else:
return category
# 类别标签
CATEGORIES = sorted(pd.Categorical(df_pw["category"]).unique())
LABELS = [map_category(category) for category in CATEGORIES]
# 绘制多子图:2*5
fig, axes = plt.subplots(2, 5, figsize=(15, 6), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
# 定义子图的位置
SLICES = [(i, j) for i in range(2) for j in range(5)]
for category, label, slice_ in zip(CATEGORIES, LABELS, SLICES):
# 子图位置
ax = axes[slice_]
# 指定类别的索引位置(列表形式)
idx = df_pw.index[df_pw["category"] == category].tolist()
# 筛选该类比的所有数据:ANGLES、HEIGHTS、COLORS
angles = ANGLES[idx]
heights = HEIGHTS[idx]
colors = COLORS[idx]
# 绘图
circular_plot(angles, heights, colors, 0.8, ax)
# 添加文本标签
ax.text(
x=0.5, y=0.5, s=label, color=colors[0], va="center", ha="center",
ma="center", fontfamily="Roboto Mono", fontsize=14, fontweight="bold",
linespacing=0.87, transform=ax.transAxes
)
# 调整子图间距
fig.subplots_adjust(wspace=0, hspace=0)

output_21_0
# 首先确定多图布局
fig, axes = plt.subplots(nrows=6, ncols=5, figsize=(15, 17.5), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
gs = axes[0, 0].get_gridspec() # 获取图像的网格规范(gridspec),用于处理子图相对位置和大小
# 移除前四行五列中(也就是前20个)的所有子图
for i in range(4):
for j in range(5):
axes[i, j].remove()
# 在移除的子图的位置添加一个大子图
ax_big = fig.add_subplot(
gs[:4, :], # 大子图将占据前四行和所有列的位置
projection="polar" # 用一个大的极坐标图填充
)
# 设置大子图的高宽比为1,使得图像看起来更加整齐
ax_big.set_aspect("equal")
# 调整空间布局
fig.subplots_adjust(
left=0.04, bottom=0.05, right=0.96, top=0.95, # margins
wspace=0, hspace=0 # spaces
)

output_23_0
# 然后将大图绘制到上面的布局中
circular_plot(ANGLES, HEIGHTS, COLORS, 1.4, ax_big)
# 文本注释
for idx, row in LABELS_DF.iterrows():
color=COLORS[row["index"]]
ax_big.text(
ANGLES[row["x"]], row["y"], row["label"], ha="right", va="center",
ma="center", size=13, family="Roboto Mono", weight="bold", color=color
)
ax_big.text(
x=0.5, y=0.56, s="********\nCracking\nYour Favorite\nPassword\n********",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=30, fontweight="bold", linespacing=0.95, transform=ax_big.transAxes
)
ax_big.text(
x=0.5, y=0.44, s="Time it takes to crack the 500 most\ncommon passwords by online guessing.\nSorted by rank and colored by category.",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=12, linespacing=0.87, transform=ax_big.transAxes
)
ax_big.text(
x=0.5, y=0.37, s="Time is displayed on a logarithmic scale\nwith the rings representing one day,\none week, one month, and one year\n(from inner to outer ring).",
color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
fontsize=11, linespacing=0.87, transform=ax_big.transAxes
)
fig

# 最后将多子图绘制在下面的布局中
slices = [(i + 4, j) for i in range(2) for j in range(5)] # 多子图的位置
for category, label, idx_ax in zip(CATEGORIES, LABELS, slices):
ax = axes[idx_ax]
idx = df_pw.index[df_pw["category"] == category].tolist()
angles = ANGLES[idx]
heights = HEIGHTS[idx]
colors = COLORS[idx]
circular_plot(angles, heights, colors, 0.9, ax)
ax.text(
x=0.5, y=0.5, s=label, color=colors[0], va="center", ha="center",
ma="center", fontfamily="Roboto Mono", fontsize=14, fontweight="bold",
linespacing=0.87, transform=ax.transAxes
)
fig.text(
x=0.5, y=0.02, s="Visualization by Cédric Scherer - Data by Knowledge is Beautiful",
color=GREY60, fontsize=14, ha="center", fontfamily="Roboto Mono", fontweight="bold"
)
fig

参考:Circular lollipop plot with Matplotlib[1]
共勉~
[1]
Circular lollipop plot with Matplotlib: https://python-graph-gallery.com/web-circular-lollipop-plot-with-matplotlib/