首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >利用圆形棒棒糖图探索密码强度与流行度

利用圆形棒棒糖图探索密码强度与流行度

作者头像
HsuHeinrich
发布2025-10-20 18:27:56
发布2025-10-20 18:27:56
1140
举报
文章被收录于专栏:HsuHeinrichHsuHeinrich

利用圆形棒棒糖图探索密码强度与流行度

代码语言:javascript
复制
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

数据探索

以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~

代码语言:javascript
复制
df_pw = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv')
df_pw = df_pw.dropna(subset=['password'])
df_pw.head()
image-20240129180657919
image-20240129180657919

image-20240129180657919

rank:在已发布密码库中的排名 password:密码的文本 category:密码所属的类别 value:通过在线猜测破解的时间 time_unit:与value匹配的时间单位

代码语言:javascript
复制
def to_seconds(value, time_unit):
    '''
    将时间转换为统一的秒
    '''
    if time_unit == "seconds":
        return value
    elif time_unit == "minutes":
        return value * 60
    elif time_unit == "hours":
        return value * 60 * 60
    elif time_unit == "days":
        return value * 60 * 27
    elif time_unit == "weeks":
        return value * 60 * 24 * 7
    elif time_unit == "months":
        return value * 60 * 24 * 30
    elif time_unit == "years":
        return value * 60 * 24 * 365
    else:
        return np.nan
代码语言:javascript
复制
# 时间列表(秒)
TIMES = [
    to_seconds(row["value"], row["time_unit"])
    for _, row in df_pw.iterrows()
]
代码语言:javascript
复制
TIME_MAX = np.max(TIMES)
TIME_MIN = np.min(TIMES)

# 时间缩放函数
def scale_to_interval(x, low=1, high=60):
    return ((x - TIME_MIN) / (TIME_MAX - TIME_MIN)) * (high - low) + low

绘制基本圆形图

代码语言:javascript
复制
# 基础变量设置

# 定义多种不同的灰色
GREY88 = "#e0e0e0"
GREY85 = "#d9d9d9"
GREY82 = "#d1d1d1"
GREY79 = "#c9c9c9"
GREY97 = "#f7f7f7"
GREY60 = "#999999"

# x轴的值
ANGLES = np.linspace(0, 2 * np.pi, len(TIMES), endpoint=False)

# y的长度
HEIGHTS = np.array(TIMES)

# 类别编码
CATEGORY_CODES = pd.Categorical(df_pw["category"]).codes

# 自定义Colormap
COLORMAP = ["#5F4690", "#1D6996", "#38A6A5", "#0F8554", "#73AF48", 
            "#EDAD08", "#E17C05", "#CC503E", "#94346E", "#666666"]

# 为每个密码类别匹配颜色
COLORS = np.array(COLORMAP)[CATEGORY_CODES]


# 常量,用于提升空间距离(轴心到起始点)
PLUS = 1000
代码语言:javascript
复制
# 存储指定的密码信息:value>90(结果包含4个密码)
LABELS_DF = df_pw[df_pw["value"] > 90].reset_index()
# 增加标签列
LABELS_DF["label"] = [
    f"{pswrd}\nRank: {int(rank)}" 
    for pswrd, rank in zip(LABELS_DF["password"], LABELS_DF["rank"])
]

# 设置指定密码的位置信息

# x的位置为TIMES对应的索引,偏差是为了插入文本标签
LABELS_DF["x"] = [40, 332, 401, 496] 
# y的长度为HEIGHTS的长度,因为y轴对数化了,所以48498112和160000000转化为对数基本无差
LABELS_DF["y"] = [160000000, 90000000, 45000000, 48498112] 
代码语言:javascript
复制
# 初始化布局
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={"projection": "polar"})

# 背景色
fig.patch.set_facecolor("white")
ax.set_facecolor("white")

# 径向轴刻度对数化
ax.set_rscale('symlog')

# 角度起始位置:90度
ax.set_theta_offset(np.pi / 2)

# 反转方向:按逆时针旋转绘制
ax.set_theta_direction(-1)

# 添加线
ax.vlines(ANGLES, 0 + PLUS, HEIGHTS + PLUS, color=COLORS, lw=0.9)

# 添加点
ax.scatter(ANGLES, HEIGHTS + PLUS, s=scale_to_interval(HEIGHTS), color=COLORS);
output_12_0
output_12_0

output_12_0

删除不必要的元素

代码语言:javascript
复制
# 移除边框(极坐标)
ax.spines["start"].set_color("none")
ax.spines["polar"].set_color("none")

# 移除网格线、x刻度、y刻度标签
ax.grid(False)
ax.set_xticks([])
ax.set_yticklabels([])

# 添加自定义网格线:每圈分别代表一天、周、月、年
HANGLES = np.linspace(0, 2 * np.pi, 200)
ax.plot(HANGLES, np.repeat(1 * 24 * 60 + PLUS, 200), color= GREY88, lw=0.7)
ax.plot(HANGLES, np.repeat(7 * 24 * 60 + PLUS, 200), color= GREY85, lw=0.7)
ax.plot(HANGLES, np.repeat(30 * 24 * 60 + PLUS, 200), color= GREY82, lw=0.7)
ax.plot(HANGLES, np.repeat(365 * 24 * 60 + PLUS, 200), color= GREY79, lw=0.7)

# 为指定的密码添加标签
for idx, row in LABELS_DF.iterrows():
    color = COLORS[row["index"]]
    ax.text(
        x=ANGLES[row["x"]], y=row["y"], s=row["label"], color=color,
        ha="right", va="center", ma="center", size=8,
        family="Roboto Mono", weight="bold"
    )

fig
output_14_0
output_14_0

output_14_0

丰富信息

代码语言:javascript
复制
# 灰色填充
ax.fill(HANGLES, np.repeat(PLUS, 200), GREY97)

# 添加文本信息
# 利用transform=ax.transAxes根据极坐标位置添加标签。例如(0.5, 0.5)表示中间位置
ax.text(
    x=0.5, y=0.58, s="********\nCracking\nYour Favorite\nPassword\n********",
    color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
    fontsize=18, fontweight="bold", linespacing=0.87, transform=ax.transAxes
)

ax.text(
    x=0.5, y=0.46, s="Time it takes to crack the 500 most\ncommon passwords by online guessing.\nSorted by rank and colored by category.",
    color=GREY60, va="center", ha="center",  ma="center", fontfamily="Roboto Mono",
    fontsize=7, linespacing=0.87, transform=ax.transAxes
)

ax.text(
    x=0.5, y=0.39, s="Time is displayed on a logarithmic scale\nwith the rings representing one day,\none week, one month, and one year\n(from inner to outer ring).",
    color=GREY60, va="center", ha="center",  ma="center", fontfamily="Roboto Mono",
    fontsize=7, linespacing=0.87, transform=ax.transAxes
)

fig

绘制多个子图

为每个类别的密码单独绘制圆形棒棒糖图

代码语言:javascript
复制
# 将上述的步骤抽象为函数circular_plot
def circular_plot(angles, heights, colors, lw, ax):
    ax.set_facecolor("white")
    
    ax.set_rscale("symlog")
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)
    
    ax.spines["start"].set_color("none")
    ax.spines["polar"].set_color("none")
    
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticklabels([])
    
    # lw用来控制线宽
    ax.vlines(angles, 0 + PLUS, heights + PLUS, color=colors, lw=lw)
    ax.scatter(angles, heights + PLUS, s=scale_to_interval(heights), color=colors)
    
    HANGLES = np.linspace(0, 2 * np.pi, 200)
    ax.plot(HANGLES, np.repeat(1 * 24 * 60 + PLUS, 200), color= GREY88, lw=0.7)
    ax.plot(HANGLES, np.repeat(7 * 24 * 60 + PLUS, 200), color= GREY85, lw=0.7)
    ax.plot(HANGLES, np.repeat(30 * 24 * 60 + PLUS, 200), color= GREY82, lw=0.7)
    ax.plot(HANGLES, np.repeat(365 * 24 * 60 + PLUS, 200), color= GREY79, lw=0.7)
    
    ax.fill(HANGLES, np.repeat(PLUS, 200), GREY97)
    
    # 扩大极坐标半径:给予较大点更为适合的绘图区域
    ax.set_rmax(ax.get_rmax() * 2)
代码语言:javascript
复制
# 自定义函数map_category,将一些名称较长的category换为换行符
def map_category(category):
    if category == "cool-macho":
        return "cool-\nmacho"
    elif category == "nerdy-pop":
        return "nerdy-\npop"
    elif category == "password-related":
        return "password-\nrelated"
    elif category == "rebellious-rude":
        return "rebel-\nlious-\nrude"
    elif category == "simple-alphanumeric":
        return "simple-\nalpha-\nnumeric"
    else:
        return category

# 类别标签
CATEGORIES = sorted(pd.Categorical(df_pw["category"]).unique())
LABELS = [map_category(category) for category in CATEGORIES]
代码语言:javascript
复制
# 绘制多子图:2*5
fig, axes = plt.subplots(2, 5, figsize=(15, 6), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")

# 定义子图的位置
SLICES = [(i, j) for i in range(2) for j in range(5)]

for category, label, slice_ in zip(CATEGORIES, LABELS, SLICES):
    # 子图位置
    ax = axes[slice_]
    
    # 指定类别的索引位置(列表形式)
    idx = df_pw.index[df_pw["category"] == category].tolist()
    
    # 筛选该类比的所有数据:ANGLES、HEIGHTS、COLORS
    angles = ANGLES[idx]
    heights = HEIGHTS[idx]
    colors = COLORS[idx]
    
    # 绘图
    circular_plot(angles, heights, colors, 0.8, ax)
    
    # 添加文本标签
    ax.text(
        x=0.5, y=0.5, s=label, color=colors[0], va="center", ha="center",
        ma="center", fontfamily="Roboto Mono", fontsize=14, fontweight="bold",
        linespacing=0.87, transform=ax.transAxes
    )

# 调整子图间距
fig.subplots_adjust(wspace=0, hspace=0)
output_21_0
output_21_0

output_21_0

多图汇总

代码语言:javascript
复制
# 首先确定多图布局
fig, axes = plt.subplots(nrows=6, ncols=5, figsize=(15, 17.5), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
gs = axes[0, 0].get_gridspec() # 获取图像的网格规范(gridspec),用于处理子图相对位置和大小

# 移除前四行五列中(也就是前20个)的所有子图
for i in range(4):
    for j in range(5):
        axes[i, j].remove()

# 在移除的子图的位置添加一个大子图
ax_big = fig.add_subplot(
    gs[:4, :], # 大子图将占据前四行和所有列的位置
    projection="polar" # 用一个大的极坐标图填充
)

# 设置大子图的高宽比为1,使得图像看起来更加整齐
ax_big.set_aspect("equal")

# 调整空间布局
fig.subplots_adjust(
    left=0.04, bottom=0.05, right=0.96, top=0.95, # margins
    wspace=0, hspace=0 # spaces
)
output_23_0
output_23_0

output_23_0

代码语言:javascript
复制
# 然后将大图绘制到上面的布局中
circular_plot(ANGLES, HEIGHTS, COLORS, 1.4, ax_big)

# 文本注释
for idx, row in LABELS_DF.iterrows():
    color=COLORS[row["index"]]
    ax_big.text(
        ANGLES[row["x"]], row["y"], row["label"], ha="right", va="center",
        ma="center", size=13, family="Roboto Mono", weight="bold", color=color
    )

ax_big.text(
    x=0.5, y=0.56, s="********\nCracking\nYour Favorite\nPassword\n********",
    color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
    fontsize=30, fontweight="bold", linespacing=0.95, transform=ax_big.transAxes
)

ax_big.text(
    x=0.5, y=0.44, s="Time it takes to crack the 500 most\ncommon passwords by online guessing.\nSorted by rank and colored by category.",
    color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
    fontsize=12, linespacing=0.87, transform=ax_big.transAxes
)

ax_big.text(
    x=0.5, y=0.37, s="Time is displayed on a logarithmic scale\nwith the rings representing one day,\none week, one month, and one year\n(from inner to outer ring).",
    color=GREY60, va="center", ha="center", ma="center", fontfamily="Roboto Mono",
    fontsize=11, linespacing=0.87, transform=ax_big.transAxes
)
fig
代码语言:javascript
复制
# 最后将多子图绘制在下面的布局中
slices = [(i + 4, j) for i in range(2) for j in range(5)] # 多子图的位置

for category, label, idx_ax in zip(CATEGORIES, LABELS, slices):
    ax = axes[idx_ax]
    idx = df_pw.index[df_pw["category"] == category].tolist()
    
    angles = ANGLES[idx]
    heights = HEIGHTS[idx]
    colors = COLORS[idx]
    
    circular_plot(angles, heights, colors, 0.9, ax)
    
    ax.text(
        x=0.5, y=0.5, s=label, color=colors[0], va="center", ha="center",
        ma="center", fontfamily="Roboto Mono", fontsize=14, fontweight="bold",
        linespacing=0.87, transform=ax.transAxes
    )

fig.text(
    x=0.5, y=0.02, s="Visualization by Cédric Scherer - Data by Knowledge is Beautiful",
    color=GREY60, fontsize=14, ha="center", fontfamily="Roboto Mono", fontweight="bold"
)

fig

参考:Circular lollipop plot with Matplotlib[1]

共勉~

参考资料

[1]

Circular lollipop plot with Matplotlib: https://python-graph-gallery.com/web-circular-lollipop-plot-with-matplotlib/

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2025-09-22,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 HsuHeinrich 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 利用圆形棒棒糖图探索密码强度与流行度
    • 数据探索
    • 绘制基本圆形图
    • 删除不必要的元素
    • 丰富信息
    • 绘制多个子图
    • 多图汇总
      • 参考资料
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档