#-*-coding:utf-8-*- import re f1 = open('d:/test/mail/bounce_list.txt','r') f2 = open('d:/test/mail/828-820.txt', 'r') ff = open('d:/test/mail/ok1', 'w') f3 = open('d:/test/mail/ok-sort', 'w') f4 = open('d:/test/mail/ok-ok', 'w') # 排除文件从f2里面踢除包含f1里的文件 file1 = [line for line in f1] file1.sort() file2 = [line for line in f2] file2.sort() for i in file2: if i not in file1: print i ff.write(i) ff.close() # 过滤重复的内容 fff = open('d:/test/mail/ok1', 'r') lines_seen = set() for line in fff: if line not in lines_seen: f3.write(line) lines_seen.add(line) f3.close() # 过滤关键字的内容 pat = '@oauth.*.com$' f33 = open('d:/test/mail/ok-sort', 'r') for line in f33: if not re.findall(pat, line): print line f4.write(line) f4.close()