由一个根节点组成,根节点产生多个通过分支连接的子节点。常用于表示层次结构或显示聚类算法的结果。树状图既可以看明白数据的层次结构,也能明白指标间的「对比」。
基于dendrogram
import pandas as pd
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import numpy as np
# 导入数据
url = 'https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/mtcars.csv'
df = pd.read_csv(url)
df = df.set_index('model')
df = df.reset_index(drop=True)
# 计算样本距离:对df执行层次聚类,并将结果存储在Z中
# Ward方差最小化算法用来计算样本距离
Z = linkage(df, 'ward')
# 树状图
dendrogram(Z, labels=df.index, leaf_rotation=90)
# 标题
plt.title('Hierarchical Clustering Dendrogram')
# 轴标签
plt.xlabel('sample index')
plt.ylabel('distance (Ward)')
plt.show()
基于pyecharts
# 基本树图
from pyecharts import options as opts
from pyecharts.charts import Tree
# 自定义数据
data = [
{
"children": [
{"name": "B"},
{
"children": [{"children": [{"name": "I"}], "name": "E"}, {"name": "F"}],
"name": "C",
},
{
"children": [
{"children": [{"name": "J"}, {"name": "K"}], "name": "G"},
{"name": "H"},
],
"name": "D",
},
],
"name": "A",
}
]
c = (
Tree()
.add("", data)
.set_global_opts(title_opts=opts.TitleOpts(title="基本树图"))
)
c.render_notebook()
自定义树状图一般是结合使用场景对相关参数进行修改,并辅以其他的绘图知识。参数信息可以通过官网进行查看,其他的绘图知识则更多来源于实战经验,大家不妨将接下来的绘图作为一种学习经验,以便于日后总结。
更多用法可参考scipy.cluster.hierarchy.dendrogram[1]
修改参数
import pandas as pd
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, set_link_color_palette
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
# 导入数据
url = 'https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/mtcars.csv'
df = pd.read_csv(url)
df = df.set_index('model')
# 计算样本距离
Z = linkage(df, 'ward')
# 初始化布局
fig = plt.figure(figsize=(12,12))
# 自定义标签
plt.subplot(3, 2, 1)
dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)
plt.title('自定义标签')
# 自定义颜色和颜色阈值
plt.subplot(3, 2, 2)
set_link_color_palette(['#b30000','#996600', '#b30086']) # 颜色
dendrogram(Z, color_threshold=240, above_threshold_color='grey') # 颜色阈值
plt.axhline(y=240, c='grey', lw=1, linestyle='dashed')
plt.title('自定义颜色和颜色阈值')
# 簇截断-叶子数量
plt.subplot(3, 2, 3)
dendrogram(Z, truncate_mode = 'lastp', p=4 )
plt.title('簇截断-叶子数量')
# 簇截断-显示层数
plt.subplot(3, 2, 4)
dendrogram(Z, truncate_mode = 'level', p=2 )
plt.title('簇截断-显示层数')
# 水平方向-右
plt.subplot(3, 2, 5)
dendrogram(Z, orientation="right", labels=df.index)
plt.title('水平方向-右')
# 水平方向-左
plt.subplot(3, 2, 6)
dendrogram(Z, orientation="left", labels=df.index)
plt.title('水平方向-左')
fig.tight_layout() # 自动调整间距
plt.show()
更多用法可参考树图 Tree[2]
import requests
import pyecharts.options as opts
from pyecharts.charts import Tree
# 获取官方的数据
url = "https://echarts.apache.org/examples/data/asset/data/flare.json"
response = requests.get(url)
data = response.json() # 将响应内容解析为JSON
# 绘制树图
c = (
Tree()
.add(
series_name="",
data=[data],
pos_top="18%",
pos_bottom="14%",
layout="radial",
symbol="emptyCircle",
symbol_size=7,
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(trigger="item", trigger_on="mousemove"),
title_opts=opts.TitleOpts(title="径向树图")
)
)
c.render_notebook()
import requests
import pyecharts.options as opts
from pyecharts.charts import Tree
# 获取官方的数据
url = "https://echarts.apache.org/examples/data/asset/data/flare.json"
response = requests.get(url)
data = response.json() # 将响应内容解析为JSON
c = (
Tree()
.add("", [data], collapse_interval=2, layout="radial")
.set_global_opts(title_opts=opts.TitleOpts(title="发散树图"))
)
c.render_notebook()
6总结
以上利用scipy的dendrogram
并结合matplotlib绘制树状图,也可通过pyecharts的Tree
快速绘制树状图。并通过修改参数或者辅以其他绘图知识自定义各种各样的树状图来适应相关使用场景。
共勉~
[1]
scipy.cluster.hierarchy.dendrogram: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.dendrogram.html
[2]
树图 Tree: https://gallery.pyecharts.org/#/Tree/README