# -*- coding:utf-8 -*-
# /usr/bin/python
'''
-------------------------------------------------
File Name : SummaryPlot.py
Description :
Run Script : python SummaryPlot.py
Envs : python == 3.66
pip install
Date : 2021/3/23 上午9:07
CodeStyle : standard, simple, readable, maintainable, and portable!
-------------------------------------------------
Change Activity:
2021/3/23 : build
-------------------------------------------------
__Author__ = "Yan Errol 13075851954"
__Email__ = "260187357@qq.com"
__Copyright__ = "Copyright 2021, Yan Errol"
-------------------------------------------------
'''
import warnings
warnings.filterwarnings("ignore")
import time
import numpy as np
import pandas as pd
import json
from properties_util import Properties
class PlotSummary(object):
'''画图和汇总'''
def __init__(self,OutDir,DataDir,dataPath,properties):
self.OutDir = OutDir
self.DataDir = DataDir
self.dataPath = dataPath
self.dataDf = None
self.properties = properties
def readData(self):
''' 读数据'''
start_time = time.time()
self.dataDF = pd.read_csv(self.dataPath, iterator=False)
def fill_nan(self,):
'''
空缺值填充
:param data: all_data
:param model: "0":空缺值固定填充0;mean:均值填充;ffill:前向填充;bfill:后向填充
:return:data:空缺值填充后的数据
'''
if self.properties["fill_nan"] == "0":
self.dataDF = self.dataDF.fillna(0)
elif self.properties["fill_nan"] == "mean":
self.dataDF = self.dataDF.fillna(self.dataDF.mean())
elif self.properties["fill_nan"] == "ffill":
self.dataDF = self.dataDF.fillna(method="ffill")
elif self.properties["fill_nan"] == "bfill":
self.dataDF = self.dataDF.fillna(method="bfill")
else:
print("Only four method!")
for label in self.properties["label"].split(","):
if label != "":
uni = self.dataDF[label].unique()
print("uni",uni)
arrayunique = np.array(uni)
np.save('../model/{}.npy'.format(label),arrayunique)
def calCost(self,ClassDF):
'''计算每个类别的费用'''
ClassDF2016 = ClassDF[ClassDF['CYRQ']<=1483163999] # 2016
ClassDF2017 = ClassDF[(ClassDF['CYRQ'] > 1483163999) & (ClassDF['CYRQ'] < 1514736000)] # 2017
ClassDF2018 = ClassDF[ClassDF['CYRQ'] >= 1514736000] # 2018
ZFY_TCFY_dict = {}
ZFY_TCFY_2016 = {}
ZFY_TCFY_2016['ZFY']=ClassDF2016['ZFY'].sum()
ZFY_TCFY_2016['TCFY']=ClassDF2016['TCFY'].sum()
ZFY_TCFY_dict['2016']=ZFY_TCFY_2016
ZFY_TCFY_2017 = {}
ZFY_TCFY_2017['ZFY'] = ClassDF2017['ZFY'].sum()
ZFY_TCFY_2017['TCFY'] = ClassDF2017['TCFY'].sum()
ZFY_TCFY_dict['2017'] = ZFY_TCFY_2017
ZFY_TCFY_2018 = {}
ZFY_TCFY_2018['ZFY'] = ClassDF2018['ZFY'].sum()
ZFY_TCFY_2018['TCFY'] = ClassDF2018['TCFY'].sum()
ZFY_TCFY_dict['2018'] = ZFY_TCFY_2018
return ZFY_TCFY_dict
def Screening(self):
'''筛选'''
Screening_Dict = {}
for label in self.properties['ScreeningLabel'].split(","):
Screening_Dict[label] = {}
if label != "":
a = np.load('../model/{}.npy'.format(label),allow_pickle=True)
uni = a.tolist()
for Class in uni:
# Index = uni.index(Class)
# ClassDF = self.dataDF[self.dataDF[label] == Index]
ClassDF = self.dataDF[self.dataDF[label]==Class]
print(ClassDF,type(ClassDF))
ZFY_TCFY_dict = self.calCost(ClassDF)
print("ZFY_TCFY_dict",ZFY_TCFY_dict)
Screening_Dict[label][Class] = ZFY_TCFY_dict
print(Screening_Dict)
with open('../dataSets/Screening_Dict.json', 'w') as f:
json.dump(Screening_Dict, f,ensure_ascii=False)
return Screening_Dict
def monthlabel(self):
'''分析每个医疗类别,每个月的交易情况'''
for label in self.properties["timelabel"].split(","):
self.dataDF[label+"s"] = pd.to_datetime(self.dataDF[label],unit="s")
print(self.dataDF)
for label in self.properties['monthLabel'].split(","):
if label != "":
a = np.load('../model/{}.npy'.format(label),allow_pickle=True)
uni = a.tolist()
for Class in uni:
ClassDF = self.dataDF[self.dataDF[label]==Class]
ClassDF = ClassDF.set_index("CYRQs").resample("1M")
monthTransaction = ClassDF['ZFY', 'TCFY'].resample('M').sum()
print(Class,"-------\n",monthTransaction)#.resample('M').sum()
monthTransaction['CountPatients'] = ClassDF['RYLB'].resample('M').count()
print(monthTransaction)
monthTransaction.to_csv("../dataSets/monthTransaction.csv")
def CountPatients(self):
'''统计年月日就诊人数'''
dataDF1D = self.dataDF.set_index("CYRQs").resample("1D")
CountPatients1D = dataDF1D['RYLB'].resample("1D").count()
print(CountPatients1D)
CountPatients1D.to_csv("../dataSets/CountPatients1D.csv")
dataDF1M = self.dataDF.set_index("CYRQs").resample("1M")
CountPatients1M = dataDF1M['RYLB'].resample("1M").count()
print(CountPatients1M)
CountPatients1M.to_csv("../dataSets/CountPatients1M.csv")
dataDF1Y = self.dataDF.set_index("CYRQs").resample("1Y")
CountPatients1Y = dataDF1Y['RYLB'].resample("1Y").count()
print(CountPatients1Y)
CountPatients1Y.to_csv("../dataSets/CountPatients1Y.csv")
def AgeDist(self):
'''年龄分布'''
age_bins = [20, 30, 40, 50, 60,70,80]
age_labels = ['20-30岁', '31-40岁', '41-50岁', '51-60岁','61-70岁',"71-80岁"]
self.dataDF['年龄分层'] = pd.cut(self.dataDF.NL, age_bins, labels=age_labels)
aggResult = self.dataDF.groupby(by=['年龄分层'])['NL'].count()
aggResult = pd.DataFrame(aggResult)
aggResult["TCFY"] = self.dataDF.groupby(by=['年龄分层'])['TCFY'].sum()
print(aggResult,type(aggResult))
aggResult.to_csv("../dataSets/aggResult.csv")
return aggResult
def Place(self):
'''地点处理'''
self.dataDF.to_csv('../dataSets/ALLfinal.csv')
with open(self.properties["dist_path"], 'r') as j:
dist_label = json.loads(j.read())
dist_label = dict(dist_label)
print(dist_label,type(dist_label))
for indexs in self.dataDF.index:
for key, value in dist_label.items():
if self.dataDF.loc[indexs,"RYQH"] in value:
self.dataDF.loc[indexs, "RYQH"] = key
if self.dataDF.loc[indexs, "RYQH"] in self.dataDF.loc[indexs, "JZQH"]:
self.dataDF.loc[indexs, "MedicalMigration"] = 0
else:
self.dataDF.loc[indexs, "MedicalMigration"] = 1
print(self.dataDF[['RYQH', 'JZQH',"MedicalMigration"]])
self.dataDF.to_csv('../dataSets/data001.csv')
if __name__ == "__main__":
read_config = Properties("../config/dataAnsys.properties")
properties = read_config.get_properties()
print(properties)
OutDir = "../Result"
DataDir = "../dataSets"
dataPath = "../dataSets/data001.csv"
plotsummary = PlotSummary(OutDir,DataDir,dataPath,properties)
plotsummary.readData()
plotsummary.fill_nan()
plotsummary.Screening()
plotsummary.monthlabel()
plotsummary.CountPatients()
aggResult = plotsummary.AgeDist()
plotsummary.Place()