正则re模块
1. re模块
python中提供了re模块,可以处理正则表达式并对文本进行处理。
-
findall,获取匹配到的所有数据
import re text = "dsf130429191912015219k13042919591219521Xkk" data_list = re.findall("(\d{6})(\d{4})(\d{2})(\d{2})(\d{3})([0-9]|X)", text) print(data_list) # [('130429', '1919', '12', '01', '521', '9'), ('130429', '1959', '12', '19', '521', 'X')]
-
match,从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None
import re text = "大小逗2B最逗3B欢乐" data = re.match("逗\dB", text) print(data) # None
import re text = "逗2B最逗3B欢乐" data = re.match("逗\dB", text) if data: content = data.group() # "逗2B" print(content)
-
search,浏览整个字符串去匹配第一个,未匹配成功返回None
import re text = "大小逗2B最逗3B欢乐" data = re.search("逗\dB", text) if data: print(data.group()) # "逗2B"
-
sub,替换匹配成功的位置
import re text = "逗2B最逗3B欢乐" data = re.sub("\dB", "沙雕", text) print(data) # 逗沙雕最逗沙雕欢乐
import re text = "逗2B最逗3B欢乐" data = re.sub("\dB", "沙雕", text, 1) print(data) # 逗沙雕最逗3B欢乐
-
split,根据匹配成功的位置分割
import re text = "逗2B最逗3B欢乐" data = re.split("\dB", text) print(data) # ['逗', '最逗', '欢乐']
import re text = "逗2B最逗3B欢乐" data = re.split("\dB", text, 1) print(data) # ['逗', '最逗3B欢乐']
-
finditer,匹配结果为迭代器
import re text = "逗2B最逗3B欢乐" data = re.finditer("\dB", text) for item in data: print(item.group())
import re text = "逗2B最逗3B欢乐" data = re.finditer("(?P<xx>\dB)", text) # 命名分组 for item in data: print(item.groupdict())
text = "dsf130429191912015219k13042919591219521Xkk" data_list = re.finditer("\d{6}(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})\d{3}[\d|X]", text) for item in data_list: info_dict = item.groupdict() print(info_dict)