社区首页 >专栏 >pandas+maplotlib 计算并绘制柏拉图（排列图）

pandas+maplotlib 计算并绘制柏拉图（排列图）

用户6021899

发布于 2020-12-14 03:43:23

91200

代码可运行

文章被收录于专栏：Python编程 pyqt matplotlibPython编程 pyqt matplotlib

运行总次数：0

代码可运行

某型号电子产品有两三百个测试参数，下图是一批该产品的测试数据，每一行代表一个unit，每一列代表一个测试参数。

下面是利用pandas，依据文本文件中自定义的参数优先级，计算各个参数坏品的百分比：

import os
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
matplotlib.rcParams.update({"font.sans-serif": "SimHei",
                            "axes.unicode_minus": False,
                            'mathtext.fontset': "cm"})

csvPath = r"H:\test.csv" 
Lot = os.path.basename(csvPath).split("_")[0]
print("Lot: %s" %Lot)

df = pd.read_csv(csvPath,skiprows=[0])
print("初始的总行数(含两行specs)：", df.shape[0])
#有复测，去重，保留靠后的数据
df_specs = df.loc[0:2,:].copy()
df_data = df.loc[2:,:].copy()#去重不能包含spec定义所在的两行
del df
df_data.drop_duplicates(subset=["Part ID"], keep = "last",inplace = True)
print("去重后的测试数据行数：", df_data.shape[0])
#for col in df:
    #print(col)
df_fail = df_data[df_data["Pass/Fail"] =="Fail"]
print("去重后的坏品数：", df_fail.shape[0]) 
#print(len(df_fail))
yield_ = 1-len(df_fail)/(len(df_data))
print("yield:","%.2f%%"%(yield_*100))
#df_pass = df[df["Pass/Fail"] =="Pass"]

with open("spec priority.txt","rt")as priority:
    cols_withSpecs = priority.readlines()
    cols_withSpecs = [line.rstrip("\n") for line in cols_withSpecs if not (line=="\n" or line.startswith("#"))] #remove \n
#print(cols_withSpecs)
fail = df_fail.loc[:,cols_withSpecs]
pareto_dict = dict()
NG_qty = len(fail)
NG_qty_ = NG_qty
print("坏品总数", NG_qty)
su = 0
for s in cols_withSpecs:
    #求出当前参数在spec 以内的dataFrame
    #发现 压线也算成fail，所有下面的>= 改成>, <= 改成<!  错误的，至少在 DUT_READ2_VALUE， 值为0可以pass
    fail = fail[(fail[s] >= df_specs[s][0]) & (fail[s]<= df_specs[s][1])]# & 的优先级高于比较运算。左右两边须括号
    pareto_dict [s] = NG_qty - len(fail)
    su += NG_qty - len(fail)
    #print(su)
    NG_qty = len(fail)
print("共%d项有spec的参数"%len(cols_withSpecs))
pareto  = [[k,v] for k,v in pareto_dict.items() if v>0] #去掉为0的项
pareto.sort(key = lambda x: x[1],reverse = True)

pareto.insert(0,["Pass",len(df_data)- NG_qty_])
#print(pareto)
def to_percent(temp, position):
    return '%.2f'%(100*temp) + '%'

acc =[]
sum_ = 0
for s,qty in pareto:
    sum_ += qty
    acc.append(float(sum_) / len(df_data)

再根据计算得到的数据绘制Pareto 图：

with plt.style.context('bmh'):    
    fig, ax = plt.subplots(1,1)
    xlabels = [s for s,qty in pareto]
    y = [float(qty)/len(df_data) for s,qty in pareto]
    ax.bar(xlabels, y)
    #data label
    for i in range(len(xlabels)):
        plt.text(xlabels[i], y[i]+0.0002, '%.2f%%' % (y[i]*100), ha='center', va= 'bottom',fontsize=10,color="red")
    ax.set_title("xxxxx Yield bridge" +"\n(yield:%.2f%%)"%(100*yield_),fontsize =18)
    for ticklabel in ax.xaxis.get_ticklabels():
        ticklabel.set_rotation(+90)#0度表示水平从左向右
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(to_percent))
    ax.set_ylabel("ration[%] out of input")
    ax.set_ylim(0,0.1)
    #ax.grid(axis="y",which='minor') # ax.grid(axis="y",which='major')
    ax2 = ax.twinx()
    ax2.plot([s for s,qty in pareto], acc,'go--', linewidth=2, markersize=5)
    ax2.yaxis.set_major_formatter(ticker.FuncFormatter(to_percent))
    ax2.set_ylabel("acc.[%]")
    ax2.set_ylim(0.8,1.0)
fig.show()