# -*- coding: utf-8 -*-
# __author__:'Administrator'
# @Time : 2018/8/31 14:19
import os
dst = "D:\\test" # 生成文件目录
# 将一个txt文件的内容,按照第几章进行分割
def SplitFile(file_path1, dst):
with open(file_path1, 'rb') as f1:
# 获取文件每一行
lines1 = f1.readlines()
# 获取file的名称
file_dir1 = file_path1.replace("\\", '/').split("/")[-1].split(".")[0]
path1 = os.path.join(dst, file_dir1)
if not os.path.exists(path1):
os.makedirs(path1)
i = 1
for line in lines1:
try:
if ("第" in line and "章 " in line) or ("第" in line and "章..." in line) or ("第" in line and "章\r\n" in line):
name = line.strip().decode('utf8')
i += 1
else:
fp = open(file_name1, 'ab+')
fp.write(line)
fp.close()
file_name1 = os.path.join(path1, "%s_%s.txt" % (i-1, name))
except Exception as e:
print e.message
# 将一个txt文件的内容,按照数字顺序进行分割
def SplitFile_by_Num(file_path2, dst):
with open(file_path2, 'rb') as f2:
lines2 = f2.readlines()
file_dir2 = file_path2.replace("\\", '/').split("/")[-1].split(".")[0]
path2 = os.path.join(dst, file_dir2)
if not os.path.exists(path2):
os.makedirs(path2)
i = 1
for line in lines2:
try:
if ("%s\r\n" % i) in line or ("%s、" % i in line) or ("%s " % i in line) or str(i) in line:
name = line.strip().decode('utf8')
i += 1
else:
fp = open(file_name2, 'ab+')
fp.write(line)
fp.close()
file_name2 = os.path.join(path2, "%s_%s.txt" % (i-1, name))
except Exception as e:
print e.message
# 获取某个目录下面的所有txt
def get_all_txt(path):
filepaths = []
for root, dirs, files in os.walk(path):
for name in files:
if '.txt' in name:
filepaths.append(os.path.join(root, name))
return filepaths
if __name__ == "__main__":
file_dir = "D:\\xiaoshuo"
file_paths = get_all_txt(unicode(file_dir, "utf8"))
for one in file_paths:
SplitFile(one, dst)
for root, dirs, files in os.walk(os.path.join(dst, one.replace("\\", '/').split("/")[-1].split(".")[0])):
if not files:
SplitFile_by_Num(one, dst)