from lxml import etree # 获取本地文件 tree = etree.parse('bendi.html') print(tree) # /表示子元素,//表示子孙后代元素 li = tree.xpath('//body/ul/li') print(li) print(len(li))
# 获取有id的li liid = tree.xpath('//body/ul/li[@id]/text()') for i in liid: print(i)
# 获取id为bj的li libj = tree.xpath('//body/ul/li[@id="bj"]/text()') print(libj)