python html 今日概念资金流入前20名

发布时间 2023-10-30 09:16:08作者: A汉克先生

#!/usr/bin/env Python
# coding=utf-8

from selenium import webdriver # 导入模块
import time
from lxml import etree # lxml库是一个HTML、XML的解析器
import tushare as ts
import requests, json
import threading
import random

def get_init_driver():
time_start = time.time()
option = webdriver.ChromeOptions() # 创建一个配置对象(ChromeOptions是控制Chrome启动属性的类)
option.add_argument('headless') # 添加启动参数add_argument为'headless',开启无界面模式(设置浏览器静默,让浏览器在后台运行,不需要加载样式和渲染)
driver = webdriver.Chrome(options=option) # 用webdriver启动浏览器(实例化带有配置的driver对象)
time_end = time.time()
print("初始化driver花时:% f 秒" % float(time_end - time_start))
return driver


# 关闭driver
def get_close_driver(driver):
driver.close()


# 今日板块行业资金流入前20名
# 近5日,近10日行业流金流入:https://data.eastmoney.com/bkzj/hy.html?stat=10
# http://quote.eastmoney.com/center/boardlist.html#industry_board
# https://data.eastmoney.com/bkzj/hy.html
def get_today_industry_hy_html(driver, number):
time2_start = time.time()
driver.get("https://data.eastmoney.com/bkzj/hy.html") # 跳转到指定的url地址(请求地址)
# time.sleep(1)
source = driver.page_source # 获取页面源码
mytree = etree.HTML(source) # 解析网页
# 用xpath获取所需的内容,返回一个列表
tables = mytree.xpath("//div[@class='dataview-body']/table")[0] # 定位表格table
# print("定位表格table")
# print(tables)
trs = tables.xpath('.//tr') # 取出所有tr标签(每一行单元格)
onetable = []
h = 0
for tr in trs:
h = h + 1
# print("第行:", h)
if h == 1 or h == 2:
continue
if h >= number:
continue
n = 0
for td in tr:
n = n + 1
# print("第列:", n)
if n != 2:
# 只取第二列
continue

texts = td.xpath(".//text()") # 取出所有td标签下的文本(每个单元格)
for text in texts:
onetable.append(text.strip(" ")) # 去除文本前后空格

# time.sleep(2)
# driver.close() # 关闭
time2_end = time.time()
hy_list_5 = get_today_industry_hy_api(6)
onetable.extend(hy_list_5)
hy_list_10 = get_today_industry_hy_api(6)
onetable.extend(hy_list_10)
onetable2 = list(set(onetable))
print("查询实时行业(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))
return onetable2


# 今日概念资金流入前20名 # http://quote.eastmoney.com/center/boardlist.html#concept_board def get_now_gn_html(driver, number): time2_start = time.time() driver.get("https://data.eastmoney.com/bkzj/gn.html") # 跳转到指定的url地址(请求地址) # time.sleep(1) source = driver.page_source # 获取页面源码 mytree = etree.HTML(source) # 解析网页 # 用xpath获取所需的内容,返回一个列表 tables = mytree.xpath("//div[@class='dataview-body']/table")[0] # 定位表格table # print("定位表格table") # print(tables) trs = tables.xpath('.//tr') # 取出所有tr标签(每一行单元格) onetable = [] h = 0 for tr in trs: h = h + 1 # print("第行:", h) if h == 1 or h == 2: continue if h >= number: continue n = 0 for td in tr: n = n + 1 # print("第列:", n) if n != 2: # 只取第二列 continue texts = td.xpath(".//text()") # 取出所有td标签下的文本(每个单元格) for text in texts: onetable.append(text.strip(" ")) # 去除文本前后空格 if basic_rule_1.is_string_in_array(onetable, "CPO"): onetable.append("共封装光学(CPO)") onetable.append("CPO概念") if basic_rule_1.is_string_in_array(onetable, "算力概念"): onetable.append("东数西算(算力)") if basic_rule_1.is_string_in_array(onetable, "光通信模块"): onetable.append("共封装光学(CPO)") # 删除昨日涨停_含一字 # time.sleep(2) # driver.close() # 关闭 time2_end = time.time() # 合并返五日概念数组#type = 'f62' # 当前f62,近5天,f164;涨幅:f3 list5 = get_now_gn_api_type(6, 'f3') onetable.extend(list5) # 合并返10日概念数组 list10 = get_now_gn_api_type(6, 'f3') onetable.extend(list10) #print("查询实时概念(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start)) # 去除涨停概念数据 onetable = basic_rule_1.is_array_not_in_array(onetable, None) # 去除重复 onetable2 = list(set(onetable)) return onetable2 def get_now_gn_df_html(driver, ts_code): code = ts_code[0:6] sc = ts_code[7:9] # print(code) code = sc + code # print(code) time2_start = time.time() driver.get( "https://emweb.securities.eastmoney.com/PC_HSF10/CoreConception/Index?type=web&code=" + code) # 跳转到指定的url地址(请求地址) # time.sleep(1) source = driver.page_source # 获取页面源码 mytree = etree.HTML(source) # 解析网页 # 用xpath获取所需的内容,返回一个列表 ps = mytree.xpath("//div[@class='summary']") # 定位表格table # print("定位ps") # print(ps) trs = ps[0].xpath("//p/text()") # trs = ps.xpath(".//text()") # 取出所有tr标签(每一行单元格) onetable = [] h = 0 for tr in trs: h = h + 1 if h != 10: continue gn_str_list = tr.strip(" ") gn_str_list = gn_str_list.strip('\n') onetable = gn_str_list.split(" ") # print(onetable) time2_end = time.time() # print("查询实时概念花时:% f 秒" % float(time2_end-time2_start)) if len(onetable) <= 0: print("在东方财富网没有到到概念数据,在Tushare查询") onetable = get_stock_gn(ts_code) # 去除涨停概念数据 onetable = basic_rule_1.is_array_not_in_array(onetable, None) return onetable