公众号:尤而小屋 编辑:Peter 作者:Peter
大家好,我是Peter~
本文基于一份公开的数据讲解plotly的多种图形的绘制,包含:
部分图预览:
Plotly是一个用于创建交互式图表的Python库,它支持多种图表类型,如折线图、散点图、饼图、热力图等。Plotly的特点如下:
总之,Plotly是一个功能强大、易于使用的可视化库,适用于数据分析、科学计算、商业智能等领域。
In 1:
import os
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import date, time, datetime
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.figure_factory as ff
import plotly.express as px
from plotly import tools
from plotly.subplots import make_subplots
from plotly.offline import iplot
import warnings
warnings.filterwarnings('ignore')
In 2:
df = pd.read_csv("StudentsPerformance.csv")
df.head()
In 3:
# 基于px
fig = px.scatter(df,x="reading score",y="writing score")
fig.show()
另一种方式:
In 4:
# 基于go.Scatter
scatter = [go.Scatter(x = df['reading score'],
y = df['writing score'],
mode ='markers')]
fig = go.Figure(scatter)
# iplot(fig)
fig.show()
对散点进行自定义:
In 5:
# 进阶版
data = [go.Scatter(x = df['reading score'],
y = df['writing score'],
mode = 'markers',
marker = dict(size = 12,
color = 'rgb(0, 189, 255)',
symbol = 'diamond',
opacity = 0.75,
line={'color': 'black',
'width': 1.5}))]
layout = go.Layout(title=dict(text='Reading Score & Writing Score',
y=0.9,
x=0.5,
xanchor= 'center',
yanchor= 'top'),
xaxis={'title':'Reading Score'},
yaxis=dict(title = 'Writing Score'),
hovermode = 'closest',
template = 'plotly_white')
fig = go.Figure(data = data, layout = layout)
# iplot(fig)
fig.show()
In 6:
trace_male = (go.Scatter(x=df[df['gender']=='male']['math score'], # x-y轴数据
y = df[df['gender']=='male']['writing score'],
showlegend=True, # 显示legend
text='Male', # 标题和名称
name='Male',
mode='markers', # 符号标记类型
marker = dict(color= 'cornflowerblue', # 符号属性的自定义:颜色、大小、透明度
size = 9,
opacity = 0.55)))
trace_female = (go.Scatter(x=df[df['gender'] == 'female']['math score'],
y = df[df['gender'] == 'female']['writing score'],
showlegend=True,
text='Female',
name = 'Female',
mode = 'markers',
marker = dict(color = 'darkorange',
size = 9,
opacity = 0.55)))
data=[trace_male,trace_female] # 生成的数据
# 布局
layout= go.Layout(title = 'Math Score & Writing Score',
xaxis = dict(title = 'Math Score'),
yaxis = dict(title = 'Writing Score'),
width = 900,
height = 600,
template = 'simple_white')
# 添加数据和布局
fig = go.Figure(data=data,layout=layout)
#iplot(fig)
fig.show()
颜色渐变条设置:
In 7:
data = [go.Scatter(x = df['reading score'],
y = df['writing score'],
mode = 'markers',
text=df['math score'],
marker=dict(size = 14,
color = df['math score'], # 颜色
showscale = True,
colorscale = 'Cividis',
colorbar = dict(title='Math Score'),
opacity = 0.6))]
layout = go.Layout(title=dict(text='Reading Score - Writing Score - Math Score',
y = 0.9,
x = 0.5,
xanchor = 'center',
yanchor = 'top'),
xaxis = dict(title = 'Reading Score'),
yaxis =dict(title = 'Writing Score'),
template='simple_white')
fig = go.Figure(data=data,layout=layout)
# iplot(fig)
fig.show()
基于for循环的散点图:
In 8:
df['parental level of education'].value_counts() # 不同的学历水平
Out8:
some college 226
associate's degree 222
high school 196
some high school 179
bachelor's degree 118
master's degree 59
Name: parental level of education, dtype: int64
In 9:
data = []
for i in df['parental level of education'].unique():
data.append(go.Scatter(x = df[df['parental level of education'] == i]['reading score'],
y = df[df['parental level of education'] == i]['math score'],
mode = 'markers',
name = str(i),
showlegend = True,
marker = dict(size = 12,
opacity = 0.65)))
layout = go.Layout(title = 'Scores by Level of Education',
xaxis = dict(title='Reading Score'),
yaxis = dict(title='Math Score'),
template = 'plotly_white')
fig = go.Figure(data=data, layout = layout)
iplot(fig)
In 10:
data = [go.Scatter(x = df['reading score'],
y = df['writing score'],
mode = 'markers',
text = df['math score'],
marker = dict(size = df['math score'] * 0.5, # 关键代码:通过指定字段来控制颜色大小
color = '#FFAE00',
showscale = False,
opacity = 0.5,
line = dict(color = 'black',
width = 0.5)))]
layout = go.Layout(title=dict(text = 'Reading Score - Writing Score - Math Score',
y = 0.9,
x = 0.5,
xanchor = 'center',
yanchor = 'top'),
xaxis = dict(title = 'Reading Score'),
yaxis = dict(title = 'Writing Score'),
template ='plotly_white')
fig = go.Figure(data = data, layout = layout)
#iplot(fig)
fig.show()
In 11:
df.columns
Out11:
Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
'test preparation course', 'math score', 'reading score',
'writing score'],
dtype='object')
In 12:
data = go.Scatter3d(x = df['math score'],
y = df['reading score'],
z = df['writing score'],
mode = 'markers',
marker = dict(color = df['math score'],
showscale = True,
colorbar = dict(title = 'Weight'),
colorscale = 'picnic',
opacity = 0.7))
layout = go.Layout(title = dict(text='Math-Reading-Writing',
y = 0.9,
x = 0.5,
xanchor = 'center',
yanchor = 'top'),
scene = dict(xaxis = dict(title = 'math score'),
yaxis = dict(title = 'reading score'),
zaxis = dict(title = 'writing score')),
font = dict(size = 12),
template = 'plotly_white')
fig = go.Figure(data = data, layout = layout)
# iplot(fig)
fig.show()
生成模拟数据
In 13:
df1 = pd.DataFrame({'date': [date(year = 2015 + i, month = 1, day = 1) for i in range(10)],
'students': np.random.randint(25,60,10),
'lecturers': np.random.randint(10, 20, 10)})
df1
Out13:
date | students | lecturers | |
---|---|---|---|
0 | 2015-01-01 | 46 | 13 |
1 | 2016-01-01 | 26 | 17 |
2 | 2017-01-01 | 42 | 19 |
3 | 2018-01-01 | 31 | 10 |
4 | 2019-01-01 | 56 | 15 |
5 | 2020-01-01 | 34 | 13 |
6 | 2021-01-01 | 27 | 14 |
7 | 2022-01-01 | 42 | 18 |
8 | 2023-01-01 | 54 | 11 |
9 | 2024-01-01 | 44 | 19 |
In 14:
# 基于px
fig = px.line(df1,x="date",y="students")
fig.show()
另一种方法:
In 15:
# 基于go
line = [go.Scatter(x = df1['date'],
y = df1['students'],
mode = 'lines')]
fig = go.Figure(data = line)
fig.show()
自定义标题、xy轴名称等:
In 16:
data = go.Scatter(x = df1['date'],
y = df1['students'],
mode = 'lines',
name = 'students')
layout = go.Layout(title={'text': "Number of Students by Years",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Student'),
template = 'plotly_white' # 布局使用的模板
)
fig = go.Figure(data = data, layout = layout)
# iplot(fig)
fig.show()
同时使用多个数据,更新模板:
In 17:
# 先生成两个数据
student = go.Scatter(x = df1['date'],
y = df1['students'],
mode = 'lines',
name = 'students',
marker = dict(color = 'darkorange'))
lecturer = go.Scatter(x = df1['date'],
y = df1['lecturers'],
mode = 'lines',
name = 'lecturers',
marker = dict(color = 'royalblue'))
# 布局的设置
layout = go.Layout(title={'text': "Number of Students & Lecturers by Years",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = 'Year'),
template = 'plotly_dark') # 换成黑色主题
# data数据以列表的形式
fig = go.Figure(data = [student, lecturer], layout = layout)
fig.show()
In 18:
x = df.groupby('gender').agg({'math score':'mean'}).reset_index()['gender']
x
Out18:
0 female
1 male
Name: gender, dtype: object
In 19:
data = go.Bar(x = df.groupby('gender').agg({'math score':'mean'}).reset_index()['gender'], # xy轴数据
y = df.groupby('gender').agg({'math score':'mean'}).reset_index()['math score'],
width = [0.5, 0.5], # 每个柱子的宽度
# 待显示的文本、位置、字体大小及颜色
text =round(df.groupby('gender').agg({'reading score':'mean'}).reset_index()['reading score'],2),
textposition="inside", # inside\outside\auto
textfont=dict(size=26,color="deeppink"),
# 柱体设置
marker = dict(color = 'cornflowerblue',
opacity = 0.7,
line_color = 'black',
line_width = 1))
# 布局设置
layout = go.Layout(title='基于性别的数学平均分',
xaxis = dict(title='Gender'),
yaxis =dict(title='Math Score'),
width = 700,
height = 700,
# template = 'plotly_white'
)
fig=go.Figure(data = data, layout = layout)
fig.update_yaxes(range = [0, 100])
# iplot(fig)
fig.show()
In 20:
# 生成3个数据轨迹
trace1 = go.Bar(x = df.groupby('gender').agg({'reading score':'mean'}).reset_index()['gender'], # x-y-显示文本
y = df.groupby('gender').agg({'reading score':'mean'}).reset_index()['reading score'],
text = round(df.groupby('gender').agg({'reading score':'mean'}).reset_index()['reading score'],2),
textposition = 'auto', # 文本位置
name = 'Reading Score', # 名称
textfont = dict(size = 16), # 字体
marker = dict(color = '#06F5E3', opacity = 0.65)) # 标记符号设置
trace2 = go.Bar(x = df.groupby('gender').agg({'writing score':'mean'}).reset_index()['gender'],
y = df.groupby('gender').agg({'writing score':'mean'}).reset_index()['writing score'],
text = round(df.groupby('gender').agg({'writing score':'mean'}).reset_index()['writing score'],2),
textposition = 'auto',
name = 'Writing Score',
textfont = dict(size = 16),
marker=dict(color='#FEAD00',opacity = 0.65))
trace3 = go.Bar(x = df.groupby('gender').agg({'math score':'mean'}).reset_index()['gender'],
y = df.groupby('gender').agg({'math score':'mean'}).reset_index()['math score'],
text =round(df.groupby('gender').agg({'math score':'mean'}).reset_index()['math score'],2),
textposition= 'auto',
name = 'Math Score',
textfont = dict(size = 16),
marker=dict(color='#CC00FE',opacity = 0.65))
layout = go.Layout(title={'text': "Avg Scores by Gender",
'x':0.5,'y':0.9,
'xanchor': 'center','yanchor': 'top'
},
barmode='group',
legend=dict(x=0.05,
y=1.0,
bgcolor='rgba(255, 255, 255, 0)',
bordercolor='rgba(255, 255, 255, 0)'),
xaxis = dict(title = 'Gender'),
yaxis = dict(title = 'Score'),
template ='plotly_white')
fig = go.Figure(data = [trace1,trace2,trace3],
layout=layout)
fig.update_yaxes(range=[0,100])
iplot(fig)
In 21:
df.groupby(['parental level of education']).mean() # 基于某个字段的均值
Out21:
math score | reading score | writing score | |
---|---|---|---|
parental level of education | |||
associate's degree | 67.882883 | 70.927928 | 69.896396 |
bachelor's degree | 69.389831 | 73.000000 | 73.381356 |
high school | 62.137755 | 64.704082 | 62.448980 |
master's degree | 69.745763 | 75.372881 | 75.677966 |
some college | 67.128319 | 69.460177 | 68.840708 |
some high school | 63.497207 | 66.938547 | 64.888268 |
In 22:
parental_avg = pd.DataFrame(df.groupby(['parental level of education']).mean())
parental_avg = parental_avg.reset_index()
trace1 = go.Bar(x = parental_avg['parental level of education'], # 字段的唯一值信息
y = parental_avg['math score'],
name = 'math score',
marker = dict(color ='#F2E80C',opacity = 0.7))
trace2 = go.Bar(x = parental_avg['parental level of education'],
y = parental_avg['reading score'],
name ='reading score',
marker = dict(color ='#44F20C',opacity = 0.7))
trace3 = go.Bar(x = parental_avg['parental level of education'],
y = parental_avg['writing score'],
name='writing score',
marker = dict(color = '#F20CE1',opacity = 0.7))
layout = go.Layout(title = 'Avg Scores by Level of Education',
barmode = 'stack',
xaxis = dict(title='Level of Education'),
yaxis =dict(title='Score'),
template = 'plotly_dark')
fig = go.Figure(data = [trace1, trace2, trace3], layout=layout)
fig.show()
基础柱状图:
In 23:
data = go.Box(y=df['math score'], # 待绘图的数据
name = 'Math Score',
marker_color='#91E26B')
layout = go.Layout(title={'text': "Math Score", 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
width = 600,
height = 600)
fig = go.Figure(data = data, layout=layout)
fig.show()
同时绘制子图和箱型图:
In 24:
fig = make_subplots(rows=1, # 子图1*2
cols=2,
shared_yaxes=True, # 共享y轴
subplot_titles=("Male", "Female") # 子图名称
)
fig.add_trace(go.Box(y =df[df['gender']=='male']['writing score'], # male----writing
showlegend=False,
name = 'Writing Score',
marker_color='#1760E1'),
row=1,col=1)
fig.add_trace(go.Box(y =df[df['gender']=='male']['math score'], # male----math
showlegend=False ,
name = 'Math Score',
marker_color='#17E160'),
row=1,col=1)
fig.add_trace(go.Box(y =df[df['gender']=='male']['reading score'], # male----reading
showlegend=False ,
name = 'Reading Score',
marker_color='#E1E117'),
row=1,col=1)
fig.add_trace(go.Box(y =df[df['gender']=='female']['writing score'], # female----writing
showlegend=False,
name = 'Writing Score',
marker_color='#1760E1'),
row=1,col=2)
fig.add_trace(go.Box(y =df[df['gender']=='female']['math score'] , # female----math
showlegend=False,
name = 'Math Score',
marker_color='#17E160'),
row=1,col=2)
fig.add_trace(go.Box(y =df[df['gender']=='female']['reading score'], # female----reading
showlegend=False ,
name = 'Reading Score',
marker_color='#E1E117'),
row=1,col=2)
fig.update_layout(title={'text': "Scores by Gender",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
width = 800,
height= 450,
template='plotly')
fig.show()
In 25:
colors = ['#28F20C', '#0CF2F2', '#F27F0C', '#F20C52']
fig = go.Figure(data=[go.Pie(labels = df['race/ethnicity'].value_counts().keys(), # 字段的唯一值信息
values = df['race/ethnicity'].value_counts().values)]) # 不同的数量
fig.update_traces(hoverinfo='value', # 悬停信息
textinfo='label', # 每个扇形显示的信息
textfont_size=16, # 字体大小和位置
textposition ='auto',
showlegend=False, # 不显示图例
#marker=dict(colors=colors) # 颜色设置
)
fig.update_layout(title={'text': "Race/Ethnicity Gropus",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
template='simple_white')
# iplot(fig)
fig.show()
可以绘制扇形区域图:
In 26:
colors = ['#14CFE8', '#E814C1']
fig = go.Figure(data=[go.Pie(labels = df['lunch'].value_counts().keys(),
values = df['lunch'].value_counts().values,
pull = [0, 0.25])]) # 控制每个扇形区块的偏离程度
fig.update_traces(hoverinfo ='label',
textinfo ='percent',
textfont_size = 20,
textposition ='auto',
marker=dict(colors=colors,
line = dict(color = 'black', width = 1.5)))
fig.update_layout(title={'text': "Percentages of Lunch Types",
'x':0.5,
'y':0.9,
'xanchor': 'center',
'yanchor': 'top'},
template='plotly_white')
# iplot(fig)
fig.show()
主要是通过hole属性来控制:
In 27:
colors = ['#D7DD19', '#6FDD19', '#19DDA5', '#195ADD','#A219DD','#DD1984']
fig = go.Figure(data=[go.Pie(labels = df['parental level of education'].value_counts().keys(),
values = df['parental level of education'].value_counts().values)])
fig.update_traces(hoverinfo='label',
textinfo='value',
hole = 0.4, # 内圈的大小
textfont_size = 22,
textposition ='auto',
marker=dict(colors = colors,
line = dict(color = 'white',
width = 2)))
fig.update_layout(title={'text': "Parental Level of Education",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
template='simple_white')
fig.show()
In 28:
data = [go.Histogram(x = df['math score'], # 待绘制的数据
xbins = dict(start = 0, # 起始值和间隔
end = 100,
size = 5),
marker=dict(color ='#FFE400', # 箱体的颜色、线形颜色和宽度
line = dict(color='black',width=2))
)
]
layout = go.Layout(title ='直方图绘制',
xaxis = dict(title='Score'),
yaxis =dict(title='Frequency'),
width=700,
height=450,
template = 'simple_white'
)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
分组的直方图:
In 29:
fig = go.Figure()
# 添加两个数据
fig.add_trace(go.Histogram(x=df[df['gender']=='male']['reading score'], # male数据
# 箱体起止点、间隔
xbins = dict(start = 0,end =100,size =5),
name='Male',
marker=dict(color = '#0891EF',opacity = 0.5)))
fig.add_trace(go.Histogram(x=df[df['gender']=='female']['reading score'],
xbins = dict(start = 0,end =100, size =5),
name='Female',
marker =dict(color ='#FF00E0',opacity = 0.5)))
fig.update_layout(title='Reading Scores Histogram',
barmode='overlay',
xaxis = dict(title='Score'),
yaxis =dict(title='Frequency'),
width=700,
height=450)
fig.show()
distplots图是一种用于展示数值数据的统计表示的图形,它结合了直方图、核密度估计或正态曲线以及地毯图。distplots图提供了一种灵活的方式来观察和分析单变量观测值的分布特征。
In 30:
hist_data = []
group_labels=[]
for i in range(len(df['race/ethnicity'].unique())): # 唯一值长度
hist_data.append(df[df['race/ethnicity'] == df['race/ethnicity'].unique()[i]]['math score']) # 唯一值对应的math score数据
group_labels.append(df['race/ethnicity'].unique()[i]) # 唯一值数据
# 绘制核密度图
fig = ff.create_distplot(hist_data, group_labels, bin_size=5)
fig.update_layout(title={'text': "Math Scores Distplot",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
barmode='overlay',
template='plotly_white')
fig.show()
In 31:
fig = go.Figure(go.Heatmap(x=df['gender'],
y= df['test preparation course'],
z = df['math score'].values.tolist()))
fig.show()
绘制进阶版的热力图:
In 32:
data = [go.Heatmap(x=df['gender'],
y= df['parental level of education'],
z = df['math score'].values.tolist(), # 颜色条
colorscale = 'Magma')]
layout = go.Layout(title={'text': "Gender & Level of Education",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title='Gender'),
yaxis =dict(title='Level of Education'),
width=600,
height=450,
template='plotly_white')
fig = go.Figure(data = data, layout = layout)
iplot(fig)
In 33:
# 子图设置
fig = make_subplots(rows = 1, # 行列设置
cols = 2,
shared_yaxes = True, # 共享y轴
subplot_titles = ("2015-2019", "2020-2024")) # 子标题设置
# 添加2个轨迹数据
fig.add_trace(go.Scatter(x = df1['date'][0:5], # xy数据
y = df1['students'][0:5],
mode = 'lines', # 模式:线形
showlegend = False, # 不显示legend
name = 'students15-19', # 标题
line = dict(color = '#18FF01', # 线属性
width = 3,
dash = 'dashdot')),
row=1, col=1)
fig.add_trace(go.Scatter(x = df1['date'][5:10],
y = df1['students'][5:10],
mode = 'lines',
showlegend = False,
name = 'students20-24',
line = dict(color = '#01AAFF',
width = 3,
dash = 'dash')),
row=1, col=2)
# 子图中y轴名称设置
fig.update_yaxes(title_text = "Students", row=1, col=1)
fig.update_yaxes(title_text = "Students", row=1, col=2)
# 布局设置
fig.update_layout(title=dict(text ='Number of Students by Years',
y = 0.9,
x = 0.5,
xanchor = 'center',
yanchor = 'top'),
template = 'plotly_dark') # 主题
# iplot(fig)
fig.show()
绘制多个箱型图:
In 34:
data = [go.Box(x =df['reading score'],
showlegend=True,
name = 'Reading Score'),
go.Box(x=df['writing score'],
showlegend=True,
name = 'Writing Score'),
go.Box(x=df['math score'],
showlegend=True,
name = 'Math Score')]
layout = go.Layout(title={'text': "Scores",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
width = 700,
height=450,
#template='plotly_dark'
)
fig = go.Figure(data = data, layout = layout)
fig.show()
In 35:
colors = ['#4BA7CF','#CF5B4B','#B764D6','#E3885B','#5BE3E1']
fig = make_subplots(rows=1,cols=2, # 子图1*2
subplot_titles=('Countplot','Percentages'), # 子图标题
specs=[[{"type": "xy"}, # 每个子图的类型
{'type':'domain'}]])
# 子图1:柱状图
fig.add_trace(go.Bar( y = df['race/ethnicity'].value_counts().values.tolist(),
x = df['race/ethnicity'].value_counts().index,
text=df['race/ethnicity'].value_counts().values.tolist(), # 显示的数据
textfont=dict(size = 18, color = 'white'),
name='race/ethnicity',
textposition = 'auto',
showlegend=False,
marker=dict(color = colors)),
row = 1, col = 1)
# 子图2:饼图
fig.add_trace(go.Pie(labels=df['race/ethnicity'].value_counts().keys(),
values=df['race/ethnicity'].value_counts().values,
textfont = dict(size = 18,
color = 'white'),
textposition='auto',
showlegend = False,
name = 'race/ethnicity',
marker=dict(colors = colors)),
row = 1, col = 2)
fig.update_layout(title={'text': 'Race/Ethnicity',
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
template='plotly_dark'
)
fig.show()
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。