前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >山东省第二届数字创新大赛-临沂赛场 医保大数据分析

山东省第二届数字创新大赛-临沂赛场 医保大数据分析

作者头像
三更两点
发布2021-04-02 09:47:12
4070
发布2021-04-02 09:47:12
举报

数据处理以及分析

代码语言:javascript
复制
# -*- coding:utf-8 -*-
# /usr/bin/python
'''
-------------------------------------------------
   File Name   :  SummaryPlot.py
   Description :  
   Run Script  :  python  SummaryPlot.py
   Envs        :  python == 3.66
                  pip install  
   Date        :  2021/3/23  上午9:07
   CodeStyle   :  standard, simple, readable, maintainable, and portable!
-------------------------------------------------
   Change Activity:
          2021/3/23 : build
-------------------------------------------------
__Author__ = "Yan Errol 13075851954"
__Email__ = "260187357@qq.com"
__Copyright__ = "Copyright 2021, Yan Errol"
-------------------------------------------------
'''
import warnings
warnings.filterwarnings("ignore")
import time
import numpy as np
import pandas as pd
import json
from properties_util import Properties

class PlotSummary(object):
    '''画图和汇总'''
    def __init__(self,OutDir,DataDir,dataPath,properties):
        self.OutDir = OutDir
        self.DataDir = DataDir
        self.dataPath = dataPath
        self.dataDf = None
        self.properties = properties


    def readData(self):
        ''' 读数据'''
        start_time = time.time()
        self.dataDF = pd.read_csv(self.dataPath, iterator=False)


    def fill_nan(self,):
        '''
        空缺值填充
        :param data: all_data
        :param model: "0":空缺值固定填充0;mean:均值填充;ffill:前向填充;bfill:后向填充
        :return:data:空缺值填充后的数据
        '''
        if self.properties["fill_nan"] == "0":
            self.dataDF = self.dataDF.fillna(0)
        elif self.properties["fill_nan"] == "mean":
            self.dataDF = self.dataDF.fillna(self.dataDF.mean())
        elif self.properties["fill_nan"] == "ffill":
            self.dataDF =  self.dataDF.fillna(method="ffill")
        elif self.properties["fill_nan"] == "bfill":
            self.dataDF =  self.dataDF.fillna(method="bfill")
        else:
            print("Only four method!")

        for label in self.properties["label"].split(","):
            if label != "":
                uni = self.dataDF[label].unique()
                print("uni",uni)
                arrayunique = np.array(uni)
                np.save('../model/{}.npy'.format(label),arrayunique)

    def calCost(self,ClassDF):
        '''计算每个类别的费用'''
        ClassDF2016 = ClassDF[ClassDF['CYRQ']<=1483163999]  # 2016
        ClassDF2017 = ClassDF[(ClassDF['CYRQ'] > 1483163999) & (ClassDF['CYRQ'] < 1514736000)]  # 2017
        ClassDF2018 = ClassDF[ClassDF['CYRQ'] >= 1514736000]  # 2018

        ZFY_TCFY_dict = {}
        ZFY_TCFY_2016 = {}
        ZFY_TCFY_2016['ZFY']=ClassDF2016['ZFY'].sum()
        ZFY_TCFY_2016['TCFY']=ClassDF2016['TCFY'].sum()
        ZFY_TCFY_dict['2016']=ZFY_TCFY_2016

        ZFY_TCFY_2017 = {}
        ZFY_TCFY_2017['ZFY'] = ClassDF2017['ZFY'].sum()
        ZFY_TCFY_2017['TCFY'] = ClassDF2017['TCFY'].sum()
        ZFY_TCFY_dict['2017'] = ZFY_TCFY_2017

        ZFY_TCFY_2018 = {}
        ZFY_TCFY_2018['ZFY'] = ClassDF2018['ZFY'].sum()
        ZFY_TCFY_2018['TCFY'] = ClassDF2018['TCFY'].sum()
        ZFY_TCFY_dict['2018'] = ZFY_TCFY_2018
        return ZFY_TCFY_dict


    def Screening(self):
        '''筛选'''
        Screening_Dict = {}
        for label in self.properties['ScreeningLabel'].split(","):
            Screening_Dict[label] = {}
            if label != "":
                a = np.load('../model/{}.npy'.format(label),allow_pickle=True)
                uni = a.tolist()
                for Class in uni:
                    # Index = uni.index(Class)
                    # ClassDF = self.dataDF[self.dataDF[label] == Index]
                    ClassDF =  self.dataDF[self.dataDF[label]==Class]
                    print(ClassDF,type(ClassDF))
                    ZFY_TCFY_dict = self.calCost(ClassDF)
                    print("ZFY_TCFY_dict",ZFY_TCFY_dict)
                    Screening_Dict[label][Class] = ZFY_TCFY_dict
        print(Screening_Dict)
        with open('../dataSets/Screening_Dict.json', 'w') as f:
            json.dump(Screening_Dict, f,ensure_ascii=False)
        return Screening_Dict

    def monthlabel(self):
        '''分析每个医疗类别,每个月的交易情况'''
        for label in self.properties["timelabel"].split(","):
            self.dataDF[label+"s"] = pd.to_datetime(self.dataDF[label],unit="s")

        print(self.dataDF)
        for label in self.properties['monthLabel'].split(","):
            if label != "":
                a = np.load('../model/{}.npy'.format(label),allow_pickle=True)
                uni = a.tolist()

                for Class in uni:
                    ClassDF =  self.dataDF[self.dataDF[label]==Class]
                    ClassDF = ClassDF.set_index("CYRQs").resample("1M")
                    monthTransaction = ClassDF['ZFY', 'TCFY'].resample('M').sum()
                    print(Class,"-------\n",monthTransaction)#.resample('M').sum()
                    monthTransaction['CountPatients'] = ClassDF['RYLB'].resample('M').count()
                    print(monthTransaction)
        monthTransaction.to_csv("../dataSets/monthTransaction.csv")

    def CountPatients(self):
        '''统计年月日就诊人数'''
        dataDF1D =  self.dataDF.set_index("CYRQs").resample("1D")
        CountPatients1D = dataDF1D['RYLB'].resample("1D").count()
        print(CountPatients1D)
        CountPatients1D.to_csv("../dataSets/CountPatients1D.csv")

        dataDF1M = self.dataDF.set_index("CYRQs").resample("1M")
        CountPatients1M = dataDF1M['RYLB'].resample("1M").count()
        print(CountPatients1M)
        CountPatients1M.to_csv("../dataSets/CountPatients1M.csv")

        dataDF1Y = self.dataDF.set_index("CYRQs").resample("1Y")
        CountPatients1Y = dataDF1Y['RYLB'].resample("1Y").count()
        print(CountPatients1Y)
        CountPatients1Y.to_csv("../dataSets/CountPatients1Y.csv")

    def AgeDist(self):
        '''年龄分布'''
        age_bins = [20, 30, 40, 50, 60,70,80]
        age_labels = ['20-30岁', '31-40岁', '41-50岁', '51-60岁','61-70岁',"71-80岁"]
        self.dataDF['年龄分层'] = pd.cut(self.dataDF.NL, age_bins, labels=age_labels)
        aggResult = self.dataDF.groupby(by=['年龄分层'])['NL'].count()
        aggResult = pd.DataFrame(aggResult)
        aggResult["TCFY"] = self.dataDF.groupby(by=['年龄分层'])['TCFY'].sum()
        print(aggResult,type(aggResult))
        aggResult.to_csv("../dataSets/aggResult.csv")
        return aggResult

    def Place(self):
        '''地点处理'''
        self.dataDF.to_csv('../dataSets/ALLfinal.csv')
        with open(self.properties["dist_path"], 'r') as j:
            dist_label = json.loads(j.read())
            dist_label = dict(dist_label)
            print(dist_label,type(dist_label))

        for indexs in self.dataDF.index:
            for key, value in dist_label.items():
                if self.dataDF.loc[indexs,"RYQH"] in value:
                    self.dataDF.loc[indexs, "RYQH"] = key
            if self.dataDF.loc[indexs, "RYQH"] in self.dataDF.loc[indexs, "JZQH"]:
                self.dataDF.loc[indexs, "MedicalMigration"] = 0
            else:
                self.dataDF.loc[indexs, "MedicalMigration"] = 1

        print(self.dataDF[['RYQH', 'JZQH',"MedicalMigration"]])
        self.dataDF.to_csv('../dataSets/data001.csv')

if __name__ == "__main__":
    read_config = Properties("../config/dataAnsys.properties")
    properties = read_config.get_properties()
    print(properties)
    OutDir = "../Result"
    DataDir = "../dataSets"
    dataPath = "../dataSets/data001.csv"
    plotsummary = PlotSummary(OutDir,DataDir,dataPath,properties)
    plotsummary.readData()
    plotsummary.fill_nan()
    plotsummary.Screening()
    plotsummary.monthlabel()
    plotsummary.CountPatients()
    aggResult = plotsummary.AgeDist()
    plotsummary.Place()
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2021/04/01 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 数据处理以及分析
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档