selenium爬虫 根据域名后缀查询网站数量

发布时间 2023-05-29 11:11:38作者: laolao

本质是对‘site’功能的自动化查询

import re
from selenium import webdriver

a = {'org.do', ...., 'org.ua'}

driver = webdriver.Chrome()
driver.get("https://www.baidu.com/")
p_input = driver.find_element_by_id('kw')
dir_ = {}
d_ = []
import time

for aa in a:
    p_input.send_keys('site:{}'.format(aa))
    p_btn = driver.find_element_by_id('su')
    p_btn.click()
    time.sleep(2)
    try:
        text = driver.find_element_by_xpath('//*[@id="content_left"]/div[1]/div/p[1]/b').text
        number = re.findall("\d+", text)  # 输出结果为列表
        if int(''.join(number)) < 100:
            d_.append(aa)
        dir_[aa] = text
    except:
        d_.append(aa)
        dir_[aa] = 0
    p_input.clear()

print(d_)
print(dir_)
import pandas as pd

pd.DataFrame(dir_, index=[0]).to_excel('2.xlsx', index=False)
driver.close()