OpenPyXL 是个读写 Excel 2010 xlsx/xlsm/xltx/xltm 的 Python 库,简单易用,功能广泛,单元格格式/图片/表格/公式/筛选/批注/文件保护等等功能应有尽有,图表功能是其一大亮点。
OpenPyXL 几乎可以实现所有的 Excel 功能,而且接口清晰,文档丰富,学习成本相对较低。
import openpyxl
import os
class ExcelObject:
# 初始化文件
def __init__(self, filename='', sheetname='',sheettitleflag=False,sheettitle=[],readwriteflag="r"):
'''
:param filename:excel文件名称
:param sheetname:sheet的名称
:param sheettitleflag:sheet表的表头标志
如果sheettitleflag为False,则用title作为sheet表头
否则用sheet表第一行做表头
:param sheettitle: 指定的sheet表头
:param readwriteflag: 指定读写状态
r 读取sheet
w 写入新表
a 追加sheet
:return:
'''
self.filename = filename
self.sheetname = sheetname
self.sheettitleflag = sheettitleflag
self.sheettitle=sheettitle
self.readwriteflag=readwriteflag
self.recordnumber=0
self.invalidflag=False
self.sheetnames=[]
def open(self):
self.wb = openpyxl.load_workbook(self.filename)
if self.sheetname=='':
self.sheetnames=self.wb.sheetnames
else:
self.sh = self.wb[self.sheetname]
def readTitle(self):
self.open()
if self.readwriteflag!='r':pass
rows = list(self.sh.rows)
sheetdatarows=len(rows)
# 获取当前sheet记录数
# 有标题标志的要-1
# 无标题标志的正常
# 对于置为有标题标志,如果实际行数为0,则行数为-1
if self.sheettitleflag==False:
self.recordnumber=sheetdatarows-1
else:
self.recordnumber=sheetdatarows
titles = []
# 获取当前sheet的表头,只处理默认带标题标志的
# 如果记录行数>=0,则获取表头
# 如果记录行数=-1,则表头为[]
if self.sheettitleflag==False:
if self.recordnumber >=0:
for t in rows[0]:
title = t.value
titles.append(title)
self.sheettitle = titles
else:
self.sheettitle = []
# 将行数标准化,行数为-1,视为无行数
self.recordnumber = 0 if self.recordnumber == -1 else self.recordnumber
# 将错误表格更新无效标志
if self.recordnumber==0 and self.sheettitle==[]:
self.invalidflag=True
return self.sheettitle,self.recordnumber
def close(self):
self.wb.close()
def readData(self):
"""按行读取数据,最后返回一个存储字典的列表"""
self.open()
self.readTitle()
rows = list(self.sh.rows)
cases = []
if self.recordnumber==0:
self.close()
return cases
for row in rows[1:]:
case = []
for r in row:
case.append(r.value)
cases.append(dict(zip(self.sheettitle, case))) # 通过zip聚合打包用例的标题和数据
self.close()
return cases
def writeExcel(self,filename, sheetname,sheettitle,value):
'''
:param filename:excel名称
:param sheetname:sheet的名称
:param sheettitle:数据表头
:param value: 追加的数据
:return:
'''
wb = openpyxl.Workbook()
sh = wb.active
sh.title = sheetname
sh.append(sheettitle)
for ss in value:
sh.append(ss)
wb.save(filename)
print("写入数据成功!")
def addExcel(self,filename, sheetname,sheettitle,value):
'''
:param filename:excel名称
:param sheetname:sheet的名称
:param sheettitle:数据表头
:param value: 追加的数据
:return:
'''
wb = openpyxl.load_workbook(filename)
wb.create_sheet(sheetname)
sh = wb[sheetname]
sh.append(sheettitle)
for ss in value:
sh.append(ss)
wb.save(filename)
print("写入成功")
filename=r'C:\Users\baoqi\Documents\exceltest.xlsx'
sheetname1 = '有标题有数据'
sheetname2 = '无标题有数据'
sheetname3 = '有标题无数据'
sheetname4 = '无标题无数据'
sheetname5='有标题无数据,但实际无标题无数据'
sheet = ExcelObject(filename, sheetname1)
sheettitle,recordnumber = sheet.readTitle()
sheetdata=sheet.readData()
print(sheetname1,'sheet,标题为',sheettitle,'数据量为=',recordnumber)
print(sheetname1,'sheet,表数据为=',sheetdata)
sheet =ExcelObject(filename, sheetname2,sheettitleflag=True,sheettitle=['追加标题1','追加标题2','追加标题3','追加标题4'])
sheettitle,recordnumber = sheet.readTitle()
sheetdata=sheet.readData()
print(sheetname2,'sheet,标题为',sheettitle,'数据量为=',recordnumber)
print(sheetname2,'sheet,表数据为=',sheetdata)
sheet = ExcelObject(filename, sheetname3)
sheettitle,recordnumber = sheet.readTitle()
sheetdata=sheet.readData()
print(sheetname3,'sheet,标题为',sheettitle,'数据量为=',recordnumber)
print(sheetname3,'sheet,表数据为=',sheetdata)
sheet = ExcelObject(filename, sheetname4,sheettitleflag=True,sheettitle=['追加标题1','追加标题2','追加标题3','追加标题4'])
sheettitle,recordnumber = sheet.readTitle()
sheetdata=sheet.readData()
print(sheetname4,'sheet,标题为',sheettitle,'数据量为=',recordnumber)
print(sheetname4,'sheet,表数据为=',sheetdata)
sheet = ExcelObject(filename, sheetname5)
sheettitle,recordnumber = sheet.readTitle()
sheetdata=sheet.readData()
print(sheetname5,'sheet,标题为',sheettitle,'数据量为=',recordnumber)
print(sheetname5,'sheet,表数据为=',sheetdata)
writefile=r'C:\Users\baoqi\Documents\【广州】学习材料\业财一致性\统计.xlsx'
basepath = r'C:\Users\baoqi\Documents\【广州】学习材料\业财一致性'
writefile=ReadExcel()
data=[]
# 读取excel表的sheet列
for curpath in os.listdir(basepath):
for curfile in os.listdir(basepath+'\\'+curpath):
fullfile = basepath + '\\' + curpath + '\\' + curfile
sheet = ExcelObject(fullfile)
sheet.open()
print(curpath, ' ', curfile, ' ', sheet.sheetnames)
for curpath in os.listdir(basepath):
for curfile in os.listdir(basepath+'\\'+curpath):
fullfile=basepath+'\\'+curpath+'\\'+curfile
if curfile =='tableA.xlsx':
sheetnames = ['sheet0','sheet1']
elif curfile=='tableB.xlsx':
sheetnames = ['sheet2','sheet3']
elif curfile=='tableC.xlsx':
sheetnames = ['sheet4','sheet5']
elif curfile=='tableD.xlsx':
sheetnames = ['sheet0','sheet1', 'sheet2','sheet3']
if '~$' in curfile: continue
for sheetname in sheetnames:
# 读取excel表各sheet的数据量
sheet = ReadExcel(fullfile, sheetname)
sheettitle,recordnumber = sheet.readTitle()
print(curpath, ' ', curfile, ' ', sheetname,' ',recordnumber)
data.append([curpath,curfile,sheetname,recordnumber])
# 将数据写入excel的某个sheet
title=['供电局','表格','sheet名称','数量']
writefile.writeExcel(writefile, '汇总数据',title,data)
本文分享自 python与大数据分析 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!