返回列表 发帖

正则表达式

操作系统的版本信息:
[root@centos6 ~]# cat /etc/redhat-release
CentOS release 6.9 (Final)
[root@centos6 ~]#
[root@centos6 ~]# uname -r
2.6.32-696.el6.x86_64

Python的版本信息:
[root@centos6 ~]# python3 --version
Python 3.6.8





######

例子一:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演: 李大杰</p>'
Result_1 = re.compile(u'<p.*?class="name">导演: (.*?)</p>.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2)
print(type(Result_2))
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
['李大杰']
<class 'list'>
[root@Zabbix_server_01 ~]#





######

例子二:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演1: 李大杰</p>'
Result_1 = re.compile(u'<p.*?class="name">导演1: (.*?)</p>.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
print(type(Result_2[0]))
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
李大杰
<class 'str'>
[root@Zabbix_server_01 ~]#





######

例子三:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演1: 李大杰</p>add'
Result_1 = re.compile(u'<p class.*?导演1: (.*?)</p>.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
李大杰
[root@Zabbix_server_01 ~]#





######

例子四:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演1: 李大杰</p>add'
Result_1 = re.compile(u'<p.*?class=.*?导演1: (.*?)</p>.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
李大杰
[root@Zabbix_server_01 ~]#





######

例子五:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演1: 李大杰</p>add'
Result_1 = re.compile(u'<p.*?class=.*?导演1: (.*?)</p.*?ad.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
李大杰
[root@Zabbix_server_01 ~]#





######

例子六:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '<p class="name">导演=zhuohua</p>add'
Result_1 = re.compile(u'<p.*?Class="name">导演=(.*?)</P>.*?',re.I) # re.I 忽略英文字母的大小写
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子七:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '导演= zhuohua 《blog.zhuohua.store》'
Result_1 = re.compile(u'.*?导演= (.*?) 《blog.zhuohua.store》.*?')#要注意空格
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子八:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '导演= zhuohua《blog.zhuohua.store》'
Result_1 = re.compile(u'.*?导演= (.*?)《blog.zhuohua.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子九:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '导演= zhuohua "blog.zhuohua.store"'
Result_1 = re.compile(u'.*?导演= (.*?) "blog.zhuohua.*?')
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子十:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '导演= zhuohua \'blog.zhuohua.store\''
Result_1 = re.compile(u'.*?导演= (.*?) \'blog.zhuohua.*?') #这里要使用转义字符“\”
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子十一:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = "导演= zhuohua 'blog.zhuohua.store'"
Result_1 = re.compile(u".*?导演= (.*?) 'blog.zhuohua.*?")
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子十二:
筛选出“导演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re
Key_1 = '导演= zhuohua(blog.zhuohua.store)'
Result_1 = re.compile(u'.*?导演= (.*?)\(blog.zhuohua.store\).*?') #这里要使用转义字符“\”
Result_2 = Result_1.findall(Key_1)
print(Result_2[0])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
zhuohua
[root@Zabbix_server_01 ~]#





######

例子十三:
筛选出“导演”、“主演”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re

Key_1 = '<p class="info">导演: 陈凯歌 Kaige Chen&nbsp;&nbsp;&nbsp;' \
'主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha...<br>' \
'1993&nbsp;/&nbsp;中国大陆 香港&nbsp;/&nbsp;剧情 爱情</p >'

Result_1 = re.compile(u'<p.*?class="info">导演: (.*?)&nbsp;.*?'

+ u'&nbsp;&nbsp;主演: .*?(.*?)...<br>.*?'

)

Result_2 = Result_1.findall(Key_1)
print(Result_2)
print(type(Result_2))

print("-" * 10)

print(Result_2[0][0])
print(Result_2[0][1])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
[('陈凯歌 Kaige Chen', '张国荣 Leslie Cheung / 张丰毅 Fengyi Zha')]
<class 'list'>
----------
陈凯歌 Kaige Chen
张国荣 Leslie Cheung / 张丰毅 Fengyi Zha
[root@Zabbix_server_01 ~]#





######

例子十四:
筛选出“导演”、“主演”、“年份”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re

Key_1 = '<p class="info">导演: 陈凯歌 Kaige Chen&nbsp;&nbsp;&nbsp;' \
'主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha...<br>' \
'1993&nbsp;/&nbsp;中国大陆 香港&nbsp;/&nbsp;剧情 爱情</p >'

Result_1 = re.compile(u'<p.*?class="info">导演: (.*?)&nbsp;.*?'

+ u'&nbsp;&nbsp;主演: .*?(.*?)...<br>.*?'

+ u'(.*?)&nbsp;/&nbsp;.*?'

)

Result_2 = Result_1.findall(Key_1)
print(Result_2)
print(type(Result_2))

print("-" * 10)

print(Result_2[0][0])
print(Result_2[0][1])

print("-" * 10)

print(Result_2[0][2])
print(type(Result_2[0][2]))
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
[('陈凯歌 Kaige Chen', '张国荣 Leslie Cheung / 张丰毅 Fengyi Zha', '1993')]
<class 'list'>
----------
陈凯歌 Kaige Chen
张国荣 Leslie Cheung / 张丰毅 Fengyi Zha
----------
1993
<class 'str'>
[root@Zabbix_server_01 ~]#





######

例子十五:
筛选出“导演”、“主演”、“年份”、“产地”“类型”:
[root@Zabbix_server_01 ~]# cat xx.py
#coding=utf-8
import re

Key_1 = '<p class="info">导演: 陈凯歌 Kaige Chen&nbsp;&nbsp;&nbsp;' \
'主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha...<br>' \
'1993&nbsp;/&nbsp;中国大陆 香港&nbsp;/&nbsp;剧情 爱情</p >'

Result_1 = re.compile(u'<p.*?class="info">导演: (.*?)&nbsp;.*?'

+ u'&nbsp;&nbsp;主演: .*?(.*?)...<br>.*?'

+ u'(.*?)&nbsp;/&nbsp;.*?'

+ u'(.*?)&nbsp;/&nbsp;.*?'

+ u'(.*?)</p >.*?'
)

Result_2 = Result_1.findall(Key_1)
print(Result_2)
print(type(Result_2))

print("-" * 10)

print(Result_2[0][0])
print(Result_2[0][1])

print(Result_2[0][2])
print(Result_2[0][3])
print(Result_2[0][4])
[root@Zabbix_server_01 ~]#


脚本运行的结果:
[root@Zabbix_server_01 ~]# python3 xx.py
[('陈凯歌 Kaige Chen', '张国荣 Leslie Cheung / 张丰毅 Fengyi Zha', '1993', '中国大陆 香港', '剧情 爱情')]
<class 'list'>
----------
陈凯歌 Kaige Chen
张国荣 Leslie Cheung / 张丰毅 Fengyi Zha
1993
中国大陆 香港
剧情 爱情
[root@Zabbix_server_01 ~]#





相关文章:
网络爬虫_爬(word.zhuohua.store)
网络爬虫_爬(豆瓣电影)
网络爬虫_爬(电影天堂)

Python3命令集
Python3检测文件内容中是否包含关键字

返回列表