import re
text = "你好wade3maimi,我是你的fans,你是永远的wadegreast3"
data_list1 = re.findall("wade", text)
data_list2 = re.findall("你好", text)
print(data_list1)
print(data_list2)
输出:
['wade', 'wade']
['你好']
import re
text = "你好wade3maimi,我是gfdbc你的fans,abcfgh你是永远的wadegreast3cb"
data_list = re.findall("[abc]", text)
print(data_list)
输出:
['a', 'a', 'b', 'c', 'a', 'a', 'b', 'c', 'a', 'a', 'c', 'b']
import re
text = "你好qcwmaimi,我是qbw你的fansqabcw,abcfgh你是qaw永远的wadegreast3cb"
data_list = re.findall("q[abc]w", text)
print(data_list)
输出:
['qcw', 'qbw', 'qaw']
import re
text = "tahhhuutywkatbhjklntctz"
data_list = re.findall("t[a-z]", text)
print(data_list)
输出:
['ta', 'ty', 'tb', 'tc', 'tz']
import re
text = "tahhhuutywkat79bhjklntctz"
data_list = re.findall("t[0-9]", text)
print(data_list)
输出:
['t7']
import re
text = "tahhhuutywkatbhjklntctz"
data_list = re.findall("t[0-9]", text)
print(data_list)
输出:
[]
import re
text = "asd2wyt-yd3hj-jjd123"
data_list = re.findall("d\d", text)
print(data_list)
输出:
['d2', 'd3', 'd1']
import re
text = "rodt-yad3hdd9888j-jjd123"
data_list = re.findall("d\d+", text) # +,1个或n个
print(data_list)
输出:
['d3', 'd9888', 'd123']
import re
text = "rodt-yad3hdd9888j-jjd123"
data_list = re.findall("d\d*", text) # *,0个或n个
print(data_list)
输出:
['d', 'd3', 'd', 'd9888', 'd123']
import re
text = "rodt-yad3hdd9888j-jjd123"
data_list = re.findall("d\d?", text) # ?,0个或1个
print(data_list)
输出:
['d', 'd3', 'd', 'd9', 'd1']
import re
text = "rodt-yad3hdd9888j-jjd123"
data_list = re.findall("d\d{2}", text) # {n},固定n个
print(data_list)
输出:
['d98', 'd12']
import re
text = "rodt-yad3hdd9888j-jjd123"
data_list = re.findall("d\d{2,}", text) # {n,},固定n+个
print(data_list)
输出:
['d9888', 'd123']
import re
text = "rodt-yad32hdd9888j-jjd123-jjd123789"
data_list = re.findall("d\d{2,4}", text) # {n,m},固定[n,m]个
print(data_list)
输出:
['d32', 'd9888', 'd123', 'd1237']
import re
text = "rodt-yad32hdd9888j-jjd123-jjd123789"
data_list = re.findall("d+", text) # d出现1次或者n次
print(data_list)
输出:
['d', 'd', 'dd', 'd', 'd']
import re
text = "韦德wade迈阿密 韦德3e 哈哈哈韦德maimie 韦德美国_e"
# 韦德开头,e结尾,中间是1个或n个字母、数字、下划线(汉字);中间空格无法识别则分开提取
data_list = re.findall("韦德\w+e", text)
print(data_list)
输出:
['韦德wade', '韦德3e', '韦德maimie', '韦德美国_e']
import re
text = "韦德wade迈阿密韦德3e哈哈哈韦德maimie韦德美国_e"
# 韦德开头,e结尾,中间是1个或n个字母、数字、下划线(汉字);没有空格会尽可能多去匹配(默认贪婪匹配)
data_list = re.findall("韦德\w+e", text)
print(data_list)
输出:
['韦德wade迈阿密韦德3e哈哈哈韦德maimie韦德美国_e']
import re
text = "韦德wade迈阿密韦德3e哈哈哈韦德maimie韦德美国_e"
# 韦德开头,e结尾,中间是1个或n个字母、数字、下划线(汉字);找到第一个匹配就不再继续(非贪婪匹配)
data_list = re.findall("韦德\w+?e", text)
print(data_list)
输出:
['韦德wade', '韦德3e', '韦德maimie', '韦德美国_e']
import re
text = "rtyto-raoyuo-rboa"
data_list = re.findall("r.o", text) # .代表任意1个字符
print(data_list)
输出:
['rao', 'rbo']
import re
text = "rtyto-raoyuo-rboa"
data_list = re.findall("r.+o", text) # .+代表1个或n个字符,默认贪婪匹配
print(data_list)
输出:
['rtyto-raoyuo-rbo']
import re
text = "rtyto-raoyuo-rboa"
data_list = re.findall("r.+?o", text) # .+代表1个或n个字符,非贪婪匹配
print(data_list)
输出:
['rtyto', 'rao', 'rbo']
import re
text = "root admin add admin"
data_list = re.findall("a\w+\s\w+", text) # \s代表1个空格
print(data_list)
输出:
['admin add']
import re
text = "root admin fdd admin"
data_list = re.findall("a\w+\s\w+", text) # \s代表1个空格,两个空格或者是tab则无法匹配
print(data_list)
输出:
[]
import re
text = "root admin fdd admin"
data_list = re.findall("a\w+\s\s\w+", text) # \s\s代表2个空格
print(data_list)
输出:
['admin fdd']
注意:默认贪婪匹配,非贪婪匹配数量+?
import re
text = "楼主手机13046788791,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
data_list = re.findall("130467\d{5}", text) #匹配130467开头,后面5位是数字的字符
print(data_list)
输出:
['13046788791', '13046787654']
import re
text = "楼主手机13046788791,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
data_list = re.findall("13046(7\d{5})", text) # 匹配后只截取()部分字符
print(data_list)
输出:
['788791', '787654']
import re
text = "楼主手机13046788791,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
data_list = re.findall("(1\d{2})46(7\d{5})", text) # 匹配后将多个分组以元组形式保存列表
print(data_list)
输出:
[('130', '788791'), ('130', '787654')]
import re
text = "楼主手机13046root,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
# 匹配13046(7\d{5})
# 匹配13046(r\w+t)
data_list = re.findall("13046(7\d{5}|r\w+t)", text) #将匹配结果截取7\d{5}或r\w+t部分
print(data_list)
输出:
['root', '787654']
import re
text = "我的身份证号是360722199808073032,周杰伦的身份证号是45678919790921675X"
data_list = re.findall("\d{17}[\dX]", text) #匹配前面17位是数字,最后一位是数字或者X
print(data_list)
输出:
['360722199808073032', '45678919790921675X']
import re
text = "我的身份证号是360722199808073032,周杰伦的身份证号是45678919790921675X"
data_list = re.findall("\d{6}(\d{4})(\d{2})\d{5}[\dX]", text) # 将年份和月份分别提取出来
print(data_list)
输出:
[('1998', '08'), ('1979', '09')]
import re
text = "楼主手机13046root,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.findall("\w+@\w+\.\w+", text)
print(data_list)
输出
['邮箱789987666@126.com', '邮箱是8976777@qq.com']
import re
text = "楼主手机13046root,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
# 带上re.ASCII后,\w就不包含中文
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.findall("\w+@\w+\.\w+", text, re.ASCII)
print(data_list)
输出:
['789987666@126.com', '8976777@qq.com']
import re
text = "楼主手机13046root,邮箱789987666@126.com;群主号码13046787654,邮箱是8976777@qq.com"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.findall("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+", text)
print(data_list)
输出:
['789987666@126.com', '8976777@qq.com']
import re
text = "换了斗地主,2B逗3B"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.findall("\dB", text)
print(data_list)
输出:
['2B', '3B']
import re
text = "换了斗地主,2B逗3B"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.match("\dB", text)
print(data_list)
输出:
None
import re
text = "2B逗3B"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.match("\dB", text) # 返回<re.Match object; span=(0, 2), match='2B'>对象
print(data_list)
content = data_list.group() # 通过group接收对象
print(content)
输出
<re.Match object; span=(0, 2), match='2B'>
2B
match拓展应用
import re
mobile = input("请输入手机号:")
mobile = mobile.strip() # 去除空格
# 1.校验手机号是否正确
result = re.match("^1[3-9]\d{9}$", mobile) # ^表示开头,$结尾,手机号非1开头或超出11位都会校验住
if result:
print("格式正确")
else:
print("格式错误")
import re
text = "2B逗3B"
# .代表任意字符,如果特定提取.则用\.转义
data_list = re.search("\dB", text) # 返回<re.Match object; span=(0, 2), match='2B'>对象
print(data_list)
content = data_list.group() # 通过group接收对象
print(content)
输出:
<re.Match object; span=(0, 2), match='2B'>
2B
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。