假如遇到下面场景,不同项目使用不同的Excel文件跟踪进度,每周在该Excel新增一个sheet表。同时有多个这样的项目Excel文件。需要将所有项目文件中的信息提取合并到一个Excel文件中方便统计。
合并后的Excel
为方便使用,将此功能升级开发为一个Web页面
之所以可以提取各Excel sheet中的数据,前提示它们满足一定的模板格式,为方便找到格式错误,同时提供了模板格式检查的功能。
下面看一下后台代码实现:
excel_tools.py用作Excel指定信息提取以及合并
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import os
import logging
import warnings
warnings.filterwarnings('ignore')
handler=logging.FileHandler('log.txt',mode='w')
logger=logging.getLogger(__name__)
logger.addHandler(handler)
def parse_week(filePath,sheet):
_,fileNameExt=os.path.split(filePath)
fileName,_=os.path.splitext(fileNameExt)
df=pd.read_excel(filePath,sheet_name=sheet)
row1=df.iloc[0,1:]
name=row1[row1.notna()].values[0]
columns1=df.iloc[:,0].str.strip()
df.set_index(columns1,inplace=True)
df.drop(columns=[df.columns[0]],inplace=True)
dfTable=df.loc['本周任务':'下周计划']
dfTable.reset_index(inplace=True)
dfHead=dfTable.iloc[1]
dfBody=dfTable.iloc[2:-1]
dfBody.columns=dfHead
dfBody.rename(columns=lambda x:str(x).strip(),inplace=True)
#dfBody=dfBody.iloc[:,[5,-2]]
dfBody=dfBody[['责任人','实际工作量(h)']]
dfBody.dropna(inplace=True)
dfBody=dfBody.assign(项目名称=name,文件来源=fileName,sheet=sheet)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG,f'{name}:{len(dfBody)}个人')
return dfBody
def combine_week(dirPath,mode,customs):
files=os.listdir(dirPath)
sheets=[]
sheetDict={}
dfs=[]
for file in files:
filePath=os.path.join(dirPath,file)
excel=pd.ExcelFile(filePath)
sheetDict[file]=excel.sheet_names
if mode=='first':
sheetName=excel.sheet_names[0]
sheets.append((file,sheetName))
if mode=='last':
sheetName=excel.sheet_names[-1]
sheets.append((file,sheetName))
if mode=='preview':
pass
if mode=='custom':
customs=[(file,sheetName) for file,sheetName in customs if file in files]
sheets=customs
errorDict={}
print('sheets',sheets)
for file,sheetName in sheets:
errorDict.setdefault(file,{})
try:
filePath=os.path.join(dirPath,file)
df0=parse_week(filePath,sheetName)
except:
print('error',file,sheetName)
errorDict[file][sheetName]=True
df0=None
print(df0)
dfs.append(df0)
if dfs and not list(filter(lambda x:isinstance(x,None.__class__),dfs)):
df=pd.concat(dfs)
else:
df=None
print('sheetDict',sheetDict)
return df,sheetDict,errorDict
if __name__=='__main__':
pass
server.py使用tornado Web框架开发后台
# -*- coding:utf-8 -*-
import tornado.web
import tornado.httpserver
from tornado.options import options,define
import tornado.ioloop
import os
import pandas as pd
import json
from excel_tools import *
define('port',default=8000,type=int)
class BaseHandler(tornado.web.RequestHandler):
def get_current_user(self):
user=self.get_secure_cookie('user',None)
return user
class listHandler(BaseHandler):
@tornado.web.authenticated
def get(self):
print('list')
self.render('list.html',username=self.current_user)
def post(self):
username=self.get_argument('username')
data=Excel.filter(owner=username)
data=list(data.dicts())
print(username)
self.write({'result':data})
class combineHandler(tornado.web.RequestHandler):
def get(self):
self.render('combine.html',df=None,result=None,errorDict=None,sheetDict=None)
def post(self):
df=None
mode=self.get_argument('mode','first')
sheetDict=None
errorDict=None
customs=None
sheets=self.get_argument('sheets',None)
print('sheets',sheets)
if sheets:
sheets=sheets.split(',')
customs=[sheet.split('<>') for sheet in sheets]
print(customs)
files=self.request.files['file']
dirPath='./static/combine/'+str(uuid.uuid1())
os.mkdir(dirPath)
for file in files:
filename=file['filename']
with open(dirPath+'/'+filename,'wb') as f:
f.write(file.body)
df,sheetDict,errorDict=combine_week(dirPath,mode=mode,customs=customs)
print(sheetDict)
self.render('combine.html',df=df,errorDict=errorDict,sheetDict=sheetDict)
class App(tornado.web.Application):
def __init__(self,*args,**kwargs):
tornado.web.Application.__init__(self,*args,**kwargs)
if __name__=='__main__':
handlers=[('/combine',combineHandler),
]
template_path=os.path.join('.','template')
static_path=os.path.join('.','static')
app=App(handlers=handlers,template_path=template_path,
static_path=static_path,login_url='/login',
debug=True,cookie_secret='miao')
options.parse_command_line()
server=tornado.httpserver.HTTPServer(app)
server.listen(options.port)
tornado.ioloop.IOLoop().current().start()
前端页面combine.html代码如下:
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta HTTP-EQUIV="pragma" CONTENT="no-cache">
<!-- 上述3个meta标签*必须*放在最前面,任何其他内容都*必须*跟随其后! -->
<title>欢迎使用</title>
<!-- Bootstrap -->
<link rel="stylesheet" href="/static/bootstrap/css/bootstrap.min.css">
<script src="/static/jquery/jquery.min.js"></script>
<script src="/static/underscore/underscore.js"></script>
<script src="/static/leg-tree.js"></script>
<script src="/static/bootstrap/js/bootstrap.min.js"></script>
<link href="/static/bootstrap-table/dist/extensions/group-by-v2/bootstrap-table-group-by.css" rel="stylesheet">
<link href="/static/bootstrap-table/dist/bootstrap-table.min.css" rel="stylesheet">
<link href="/static/css/tree.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/tableexport.jquery.plugin@1.10.21/tableExport.min.js"></script>
<script src="/static/bootstrap-table/dist/bootstrap-table.min.js"></script>
<script src="/static/bootstrap-table/dist/extensions/group-by-v2/bootstrap-table-group-by.min.js"></script>
<script src="static/bootstrap-table/dist/extensions/export/bootstrap-table-export.js"></script>
<script src="static/bootstrap-table/dist/locale/bootstrap-table-zh-CN.js"></script>
<style>
[type="submit"] {
cursor: pointer;
}
</style>
</head>
<body>
<div class="row">
<div class="col-md-4">
<form action="/combine" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="file">上传周报Excel</label>
<input type="file" id="file" name="file" required accept=".xlsx" multiple>
<p class="help-block">可以同时选择多个Excel.</p>
</div>
<a href="/static/X项目周报.xlsx" >demo1下载</a>
<a href="/static/Y项目周报.xlsx" >demo2下载</a>
<a href="/static/Z项目周报.xlsx" >demo3下载</a>
<a href="/static/A项目周报.xlsx" >错误格式demo下载</a>
<div class="checkbox">
<label class="radio-inline">
<input type="radio" name="mode" id="preview" required value="preview">
预览sheets</label>
<label class="radio-inline">
<input type="radio" name="mode" id="first" required value="first">
使用第一个sheet</label>
<label class="radio-inline">
<input type="radio" name="mode" id="last" required value="last">
使用最后一个sheet</label>
{% if sheetDict %}
<label class="radio-inline" style="float: right;">
<input type="radio" name="mode" id="custom" required value="custom">
自定义在右边树中选择需要合并的sheet</label>
{% end %}
</div>
<input type="hidden" name="sheets" id="sheets">
<button type="submit" class="btn btn-primary">提交</button>
<a href="/dashboard" class="btn btn-default">返回</a>
</form>
{% if errorDict %}
<table class="table table-hover">
<thead>
<tr>
<th>出错文件名称</th>
<th>sheet名称</th>
</tr>
</thead>
<tbody>
{% set fail=False %}
{% for excel,sheet_dict in errorDict.items() %}
{% for sheet,failure in sheet_dict.items() %}
{% if failure %}
{% set fail=True %}
<tr>
<td>{{excel}}</td>
<td>{{sheet}}</td>
</tr>
{% end %}
{% end %}
{% end %}
</tbody>
</table>
{% if not fail %}
<h2>全部文件检查后,格式都正确</h2>
{% end %}
{% end %}
</div>
<div class="col-md-4">
{% if sheetDict %}
<div class="tree">
{% end %}
</div>
</div>
<div class="col-md-4">
{% if not isinstance(df,None.__class__) %}
<table id="table" data-show-export="true" data-file-name="周报合并"
data-group-by="true"
data-group-by-field="项目名称"
>
<thead>
<tr>
<th data-field="项目名称">项目名称</th>
<th data-field="责任人">责任人</th>
<th data-field="实际工作量(h)">实际工作量(h)</th>
<th data-field="文件来源">文件来源</th>
<th data-field="sheet">sheet</th>
</tr>
</thead>
</table>
<input type="hidden" value="{{df.to_json(orient='records')}}" id="result">
{% end %}
{% import json %}
{% if sheetDict %}
<input type="hidden" value="{{json.dumps(sheetDict)}}" id="sheetDict">
{% end %}
</div>
</div>
<script src="/static/js/combine.js?v=2.0"></script>
</body>
</html>
页面JavaScript代码combine.js
$(function () {
if (!!$('#result')[0]) {
var data = JSON.parse($('#result').val());
$table = $('#table').bootstrapTable();
$table.bootstrapTable('destroy').bootstrapTable({
exportTypes: ['json', 'xml', 'csv', 'txt', 'sql', 'excel', 'pdf'],
exportOptions:{fileName:'合并周报'},
})
console.log(data);
$table.bootstrapTable('load', data);
}
if (!!$('#sheetDict')[0]) {
var sheetDict = JSON.parse($('#sheetDict').val());
var treeData = _.map(sheetDict, (v, k) => {
return {
"name": k,
"open": true,
"ico": 'iconfont icon-yonghu',
"cascade": true,
"id": k,
"checked": false,
"children": _.map(v, value => {
return {
"name": value,
"open": true,
"ico": 'iconfont icon-yonghu',
"id": "---" + k + '<>' + value,
"checked": false
}
})
}
})
var arrs = []
$(".tree").tree(treeData, arrs);
}
$('form').submit(function () {
var sheets = _.filter($.getCheckedNodes(), x => x.startsWith('---')).map(x=>x.replace('---',''));
console.log(sheets);
$('#sheets').val(sheets.join(','));
});
})
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。